-
Notifications
You must be signed in to change notification settings - Fork 0
/
RlineScriptV3.R
53 lines (43 loc) · 1.3 KB
/
RlineScriptV3.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
library(gdsfmt)
library(SeqArray)
file.create("sample.txt")
output <- file("sample.txt")
open(output, "w")
f <- seqOpen("hapmap_r23a.gds")
sampleID = seqGetData(f, "sample.id")
returnData <- function(data) {
chrom = data[[1]]
pos = data[[2]]
alleles <- c(strsplit(data[[3]][1], ",")[[1]], ".")
genotype = data[[4]]
internalWrite <- function(alt, hom_het, id) {
writeLines(c(chrom, pos, alleles[1], alt, hom_het, id), output, sep = " ")
#formatting but may slow down process
writeLines("\n", output)
}
genotype[is.na(genotype)] = length(alleles) - 1
for (col in 1:ncol(genotype)) {
num1 = genotype[1, col] + 1
num2 = genotype[2, col] + 1
id = sampleID[col]
if (num1 == 0 & num2 == 0) {
#nothing to print
}
else if (num1 != 0 & num2 == 0) {
internalWrite(alleles[num1], "het", id)
}
else if (num1 == 0 & num2 != 0) {
internalWrite(alleles[num2], "het", id)
}
else if (num1 == num2) {
internalWrite(alleles[num1], "hom", id)
}
else {
internalWrite(alleles[num1], "het", id)
internalWrite(alleles[num2], "het", id)
}
}
}
seqApply(f, c("chromosome", "position", "allele", "genotype"), FUN= returnData, margin="by.variant")
seqClose(f)
close(output)