From 6be62b66daba31c20d58c2b1259538da718b9cbb Mon Sep 17 00:00:00 2001 From: anegm98 Date: Wed, 25 Nov 2020 10:07:13 +0200 Subject: [PATCH] add different enlargement modes for small proteins in CoMatrices --- protencoder/cli.py | 7 ++++++- protencoder/coMatrix.py | 30 +++++++++++++++++++++++------- setup.py | 2 +- 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/protencoder/cli.py b/protencoder/cli.py index ee8287d..54e1975 100644 --- a/protencoder/cli.py +++ b/protencoder/cli.py @@ -68,6 +68,10 @@ def main(): parser.add_argument("-M", "--method", default="o", help="protein encoding method; o: (defult)onehot,\ k: kmers frequency, c: compatibility matrices") + parser.add_argument("-e", "--enlargenMode", default='0', + help="mode to enlarge a small protein in method\ + compatibility matrices; pad(default), resize, tile,\ + repeat") parser.add_argument("-k", "--kmerLength", default="3", help="kmer length in frequency encoder") parser.add_argument("-f", "--Protfilter", default="", @@ -119,6 +123,7 @@ def main(): dsize = int(args.dsize) flatten = bool(int(args.flatten)) PVmodel = args.PVmodelPath + action = args.enlargenMode if not (GOfile is None): outPrefix = args.outPrefix if args.outPrefix != "" else GOfile @@ -137,7 +142,7 @@ def main(): elif method == 'k': encoder = protKmers(k) elif method == 'c': - encoder = AAcomptability(dsize) + encoder = AAcomptability(dsize, action) elif method == 'p': encoder = protvec(PVmodel, flatten) if Protfilter != "": diff --git a/protencoder/coMatrix.py b/protencoder/coMatrix.py index 039bc4f..37e4f2c 100644 --- a/protencoder/coMatrix.py +++ b/protencoder/coMatrix.py @@ -4,9 +4,10 @@ class AAcomptability(): - def __init__(self, dsize=(500, 500)): + def __init__(self, dsize=(500, 500), action='pad'): self.handler = encoder() self.dsize = (dsize, dsize) + self.action = action self.SCM, self.HCM, self.CCM = get_data() self.matrices = [self.SCM, self.HCM, self.CCM] @@ -29,12 +30,27 @@ def encode(self): self.handler.seqDict[prot] = encoded.astype('uint8') def co_resize(self, prot): - if prot.shape[0] < self.dsize[0]: - interpolation = INTER_LINEAR - elif prot.shape[0] > self.dsize[0]: - interpolation = INTER_AREA - prot = prot.reshape((prot.shape[1], prot.shape[2], 3)) - x = resize(prot, self.dsize, interpolation=interpolation) + if prot.shape[1] > self.dsize[0]: + x = resize(prot, self.dsize, interpolation=INTER_AREA) + elif prot.shape[1] < self.dsize[0]: + if self.action == "repeat": + repeatSize = int(self.dsize[0]/prot.shape[1]) + x = np.repeat(prot, repeatSize, axis=1) + x = np.repeat(x, repeatSize, axis=2) + padSize = self.dsize[0] - x.shape[1] + x = np.pad(prot, ((0, 0), (0, padSize), (0, padSize)), + mode="constant") + elif self.action == "tile": + tileSize = int(self.dsize[0]/prot.shape[1])+1 + x = np.tile(prot, (0, tileSize, tileSize)) + x = x[:, :self.dsize[0], :self.dsize[0]] + elif self.action == "resize": + x = resize(prot, self.dsize, interpolation=INTER_LINEAR) + elif self.action == "pad": + padSize = self.dsize[0] - prot.shape[1] + x = np.pad(prot, ((0, 0), (0, padSize), (0, padSize)), + mode="constant") + x = x.reshape((x.shape[1], x.shape[2], 3)) return x def read(self, seqPath): diff --git a/setup.py b/setup.py index 28685a7..b5bc0c1 100644 --- a/setup.py +++ b/setup.py @@ -49,7 +49,7 @@ test_suite='tests', tests_require=test_requirements, url='https://github.com/anegm98/protencoder', - version='1.3.0', + version='1.4.0', zip_safe=False, package_data={'protencoder': ['data/*']} )