From 97067e62116d9e18d0d325cfcb76e6406a8452db Mon Sep 17 00:00:00 2001 From: Xiuwen Zheng Date: Thu, 12 Sep 2024 15:54:03 -0500 Subject: [PATCH] fix seqVCF_Header() on ChromX --- DESCRIPTION | 4 ++-- NEWS | 11 +++++++++++ R/AddValue.R | 6 ++++-- R/ConvVCF2GDS.R | 19 ++++++++++++------- 4 files changed, 29 insertions(+), 11 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ff85c2e..c030ec5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: SeqArray Type: Package Title: Data Management of Large-Scale Whole-Genome Sequence Variant Calls -Version: 1.44.1 -Date: 2024-06-22 +Version: 1.44.2 +Date: 2024-09-12 Depends: R (>= 3.5.0), gdsfmt (>= 1.31.1) Imports: methods, parallel, IRanges, GenomicRanges, GenomeInfoDb, Biostrings, S4Vectors diff --git a/NEWS b/NEWS index ed1764f..dcca8a6 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,14 @@ +CHANGES IN VERSION 1.44.2 +------------------------- + +BUG FIXES + + o fix `seqAddValue(, val=vector("list", NUM_VARIANT))` + + o fix the ploidy returned from `seqVCF_Header()`, when there are genotypes + of males and females on Chromosome X + + CHANGES IN VERSION 1.44.1 ------------------------- diff --git a/R/AddValue.R b/R/AddValue.R index d0a96b6..a9b2152 100644 --- a/R/AddValue.R +++ b/R/AddValue.R @@ -305,8 +305,10 @@ stopifnot(length(val) == nvar) val <- lapply(val, function(x) unlist(x, use.names=FALSE)) ns <- lengths(val) - n <- add.gdsn(node, nm, unlist(val, use.names=FALSE), - compress=compress, closezip=TRUE, replace=TRUE) + val <- unlist(val, use.names=FALSE) + if (is.null(val)) val <- logical() + n <- add.gdsn(node, nm, val, compress=compress, closezip=TRUE, + replace=TRUE) st <- if (packed.idx) .maxlen_bit_type(max(ns)) else "int" nidx <- add.gdsn(node, paste0("@", nm), ns, storage=st, compress=compress, closezip=TRUE, replace=TRUE, visible=FALSE) diff --git a/R/ConvVCF2GDS.R b/R/ConvVCF2GDS.R index aa7b048..c3f2815 100644 --- a/R/ConvVCF2GDS.R +++ b/R/ConvVCF2GDS.R @@ -234,17 +234,22 @@ seqVCF_Header <- function(vcf.fn, getnum=FALSE, verbose=TRUE) if (length(geno.text)) { - txt <- unlist(sapply(geno.text, function(s) { - scan(text=s, what=character(), sep=":", quiet=TRUE, nmax=1) }, - simplify=TRUE, USE.NAMES=FALSE)) + txt <- unlist(vapply(geno.text, function(s) { + scan(text=s, what="", sep=":", quiet=TRUE, nmax=1L) }, + "", USE.NAMES=FALSE)) if (any(grepl(",", txt, fixed=TRUE))) { ploidy <- NA_integer_ } else { - num <- sapply(strsplit(txt, "[|/]"), function(x) length(x) ) - num[txt %in% "."] <- NA_integer_ - tab <- table(num) - ploidy <- as.integer(names(which.max(tab))) + num <- lengths(strsplit(txt, "[|/]")) + ploidy <- max(num) + if (ploidy > 2L) + { + if (sum(num<=2L) >= sum(num>2L)) + ploidy <- 2L + else + ploidy <- ((ploidy+1L) %/% 2L) * 2L + } } } else ploidy <- NA_integer_