-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMarker Clustering.Rmd
76 lines (54 loc) · 2.33 KB
/
Marker Clustering.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
---
title: "Clustering"
author: "Chris Lin"
date: "4/23/2020"
output: word_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
library(Seurat)
library(MAST)
library(ggplot2)
```
```{r 1}
cells <- readRDS("/projectnb/bf528/users/group3/project4/mydata.rds")
```
```{r 2}
cells.markers <- FindAllMarkers(cells, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
cells.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
top_markers <- cells.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
write.csv(top_markers,"top_DE_genes.csv")
```
```{r 3}
database <- read.table('/projectnb/bf528/users/group3/project4/PanglaoDB_markers_27_Mar_2020.tsv', sep = '\t', header=TRUE, stringsAsFactors = FALSE)
cells.markers$cell_type <- NA
cells.markers$cannonical <- NA
cells.markers$ubiquitous_index <- NA
cells.markers$specificity <- NA
for (row in 1:nrow(cells.markers)) {
gene <- cells.markers[row, "gene"]
if (gene %in% database$official.gene.symbol)
indicies <- which(database$official.gene.symbol == gene)
cells.markers$cell_type[row] <- database[indicies[1], 3]
cells.markers$cannonical[row] <- database[indicies[1], 8]
cells.markers$ubiquitous_index[row] <- database[indicies[1], 5]
cells.markers$specificity[row] <- database[indicies[1], 13]
}
cells.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
```
```{r 3}
new.cluster.ids <- c('Alpha', 'Alpha', 'Beta', 'Delta', 'Hepatic stellate', 'Ductal', 'Acinar', 'Gamma', 'Endothelial', 'Acinar')
names(new.cluster.ids) <- levels(cells)
cells <- RenameIdents(cells, new.cluster.ids)
DimPlot(cells, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend()
```
```{r fig.height = 10, fig.width = 15}
top10 <- cells.markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_logFC)
DoHeatmap(cells, features = top10$gene, disp.min = -2.5, disp.max = 2.5, size = 5, angle = 90) + theme(axis.text.y = element_text(size = 5)) + theme(axis.title.x = element_text(size = 2))
```
```{r }
novel_markers <- cells.markers %>% group_by(cluster) %>% arrange(desc(avg_logFC), .by_group = TRUE)
novel_markers <- novel_markers[ which(novel_markers$p_val_adj < 0.05 & novel_markers$specificity < 0.1 & novel_markers$avg_logFC > 0.75),]
novel_markers <- novel_markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_logFC)
```