-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathdesc-01-AAC.R
44 lines (40 loc) · 1.23 KB
/
desc-01-AAC.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#' Amino Acid Composition Descriptor
#'
#' This function calculates the Amino Acid Composition descriptor (dim: 20).
#'
#' @param x A character vector, as the input protein sequence.
#'
#' @return A length 20 named vector
#'
#' @author Nan Xiao <\url{https://nanx.me}>
#'
#' @seealso See \code{\link{extractDC}} and \code{\link{extractTC}}
#' for Dipeptide Composition and Tripeptide Composition descriptors.
#'
#' @export extractAAC
#'
#' @references
#' M. Bhasin, G. P. S. Raghava.
#' Classification of Nuclear Receptors Based on
#' Amino Acid Composition and Dipeptide Composition.
#' \emph{Journal of Biological Chemistry}, 2004, 279, 23262.
#'
#' @examples
#' x <- readFASTA(system.file("protseq/P00750.fasta", package = "protr"))[[1]]
#' extractAAC(x)
extractAAC <- function(x) {
if (protcheck(x) == FALSE) {
stop("x has unrecognized amino acid type")
}
# 20 Amino Acid Abbrevation Dictionary from
# https://en.wikipedia.org/wiki/Amino_acid#Table_of_standard_amino_acid_abbreviations_and_properties
AADict <- c(
"A", "R", "N", "D", "C", "E", "Q", "G", "H", "I",
"L", "K", "M", "F", "P", "S", "T", "W", "Y", "V"
)
AAC <- summary(
factor(strsplit(x, split = "")[[1]], levels = AADict),
maxsum = 21
) / nchar(x)
AAC
}