forked from levisimons/CALeDNA
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathZeta4eDNA.R
71 lines (64 loc) · 3.19 KB
/
Zeta4eDNA.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# /usr/usc/R/3.4.4/bin/Rscript to run on the USC cluster
require(zetadiv)
require(plyr)
setwd("~/Desktop/CALeDNA") #Change on the cluster to ~/panfs/CALeDNA
fileList <- list.files(pattern="asv_deco_dedup") #Get all of the metagenomic data tables to calculate zeta_4 diversity for given areas.
metadata <- read.table("Final_metadata.csv", header=TRUE, sep=",",as.is=T,skip=0,fill=TRUE,check.names=FALSE) #CALeDNA site metadata.
#Add in the number of samples per location in the metadata.
locationFrequency <- as.data.frame(table(metadata$loc))
colnames(locationFrequency) <- c("loc","Freq")
metadata <- join(metadata,locationFrequency,by=c("loc"))
zetaLocation <- data.frame()
zetaNum=4
for(file in fileList){
#print(file)
#Read in OTU table.
DataRaw <- read.table(file, header=TRUE, sep=",",as.is=T,skip=0,fill=TRUE,check.names=FALSE)
#Get primer name.
primer <- strsplit(file,"_")
primer <- primer[[1]][length(primer[[1]])]
primer <- strsplit(primer,".csv")
primer <- primer[[1]][1]
#Get OTU names
taxa <- DataRaw[,1]
#Transpose the OTU table and merge it with the sample metadata
DataT <- as.data.frame(t(DataRaw[,-c(1)]))
colnames(DataT) <- taxa
DataT$MatchName <- rownames(DataT)
DataPlus <-join(DataT,metadata,by=c("MatchName"))
#Filter out locations with less than 4 samples.
DataPlus <- subset(DataPlus,Freq>=zetaNum)
for(location in unique(DataPlus$loc)){
#print(paste(file,location))
DataLocationSubset <- subset(DataPlus, loc==location)
#Now remove metadata elements.
clipColumn <- which(colnames(DataLocationSubset)=="MatchName")
DataLocationSubset <- DataLocationSubset[,-c(clipColumn:ncol(DataLocationSubset))]
#Make a presence/absence matrix
DataLocationSubset[DataLocationSubset > 0] <- 1
#Calculate zeta decay parameters and a particular order of zeta.
zetaDecayScaled <- Zeta.decline.ex(DataLocationSubset,order=1:zetaNum,rescale=TRUE,plot=FALSE)
zetaDecay <- Zeta.decline.ex(DataLocationSubset,order=1:zetaNum,rescale=FALSE,plot=FALSE)
dat <- data.frame()
dat[1,1] <- file
dat[1,2] <- location
dat[1,3] <- primer
dat[1,4] <- zetaDecay$zeta.val[zetaNum]
dat[1,5] <- zetaDecay$zeta.val.sd[zetaNum]
dat[1,6] <- zetaDecayScaled$zeta.val[zetaNum]
dat[1,7] <- zetaDecayScaled$zeta.val.sd[zetaNum]
dat[1,8] <- zetaDecay$zeta.exp$coefficients[1]
dat[1,9] <- zetaDecay$zeta.exp$coefficients[2]
dat[1,10] <- zetaDecay$aic$AIC[1]
dat[1,11] <- zetaDecay$zeta.pl$coefficients[1]
dat[1,12] <- zetaDecay$zeta.pl$coefficients[2]
dat[1,13] <- zetaDecay$aic$AIC[2]
zetaLocation <- rbind(zetaLocation,dat)
print(dat)
}
}
colnames(zetaLocation) <- c("file","location","primer",paste("zeta",zetaNum,sep=""),paste("zeta",zetaNum,"sd",sep=""),paste("zeta",zetaNum,"scaled",sep=""),paste("zeta",zetaNum,"sdscaled",sep=""),"ExpIntercept","ExpExp","ExpAIC","PLIntercept","PLExp","PLAIC")
zetaLocation[is.na(zetaLocation)] <- NA
#write.table(zetaLocation,"CALeDNAZetaDecay.txt",quote=FALSE,sep="\t",row.names = FALSE)
#If the file CALeDNAZetaDecay.txt was already generated on the cluster read it in here.
zetaLocation <- read.table("CALeDNAZetaDecay.txt", header=TRUE, sep="\t",as.is=T,skip=0,fill=TRUE,check.names=FALSE)