-
Notifications
You must be signed in to change notification settings - Fork 1
/
fig_1C.R
66 lines (52 loc) · 2.13 KB
/
fig_1C.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
col.pan <- brewer.pal(n = 9, name = "Set3")
setwd("~/genomes/IR.w.flanks.20kb/")
df <- read.table("rmask.out")
names(df) <- c("score", "div", "del", "ins",
"qseq", "qbegin", "qend", "qleft",
"strand", "repeat_id", "class",
"sbegin","send", "sleft")
df$sbegin <- gsub("\\(|\\)", "", df$sbegin)
df$sleft <- gsub("\\(|\\)", "", df$sleft)
df$sbegin <- as.numeric(df$sbegin)
df$send <- as.numeric(df$send)
df$sleft <- as.numeric(df$sleft)
df$sum <- df$sbegin + df$send + df$sleft
df$perc <- 100*((df$send - df$sbegin)/df$sum)
df <- data.frame(df)
df$class <- as.character(df$class)
unique(df$class)
#myb.3 <- 20001
#myb.5 <- 22879
#ran.3 <- 143218
#ran.5 <- 192819
df[grepl("DNA", df$class),]$class <- "DNA"
df[grepl("LINE", df$class),]$class <- "LINE"
df[grepl("LTR", df$class),]$class <- "LTR"
df[grepl("Simple", df$class),]$class <- "Satellite"
df[grepl("Low", df$class),]$class <- "Satellite"
df <- df[!grepl("Unknown", df$class),]
df$class
df$sum.inv <- ifelse(test = df$perc > 0, "Straight", "Inverted")
#df <- df[which(df$sum.inv == "Straight"),]
pdf("pic_final_div.pdf", width = 16, height = 12, family = "Helvetica")
ggplot(data = df) +
geom_segment(aes(x=qbegin,
xend=qend,
y=div,
yend=div,
color = class),
size = 1,
arrow = arrow(length = unit(0.6, "cm"),
ends = ifelse(df$sum.inv == "Straight", "first", "last"),
type = "closed",
angle = 30
)) +
theme_bw() +
# geom_segment(aes(x = myb.3, xend = myb.5, y = -3, yend = -3), size = 0.5) +
# geom_segment(aes(x = ran.3, xend = ran.5, y = -3, yend = -3), size = 0.5) +
# geom_hline(yintercept=0) +
# geom_text(aes(x = (myb.3+myb.5)/2, y = -10), label = "Myb") +
# geom_text(aes(x = (ran.3+ran.5)/2, y = -10), label = "Ranbp16") +
# scale_x_continuous(breaks = c(0,50000,100000,150000,200000), labels = c("0kb", "50kb","100kb","150kb","200kb"),name = "Genomic region") +
scale_y_continuous("Diversity")
dev.off()