From 7d17ce89a8862c6b4bd01b2b5f9bcc98adfb10bf Mon Sep 17 00:00:00 2001 From: Kyle Ferriter Date: Mon, 28 Aug 2023 12:57:21 -0400 Subject: [PATCH] Add docstring to _open_for_reading. Remove old comments. --- src/biocommons/seqrepo/fastadir/fastadir.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/biocommons/seqrepo/fastadir/fastadir.py b/src/biocommons/seqrepo/fastadir/fastadir.py index db93773..fa8fbb3 100644 --- a/src/biocommons/seqrepo/fastadir/fastadir.py +++ b/src/biocommons/seqrepo/fastadir/fastadir.py @@ -139,7 +139,6 @@ def fetch(self, seq_id, start=None, end=None): """fetch sequence by seq_id, optionally with start, end bounds """ - # INFO path: /Users/kferrite/dev/biocommons.seqrepo/seqrepo/2021-01-29/sequences/2016/0824/050304/1472014984.5124342.fa.bgz, seq_id: v_QTc1p-MUYdgrRv4LMT6ByXIOsdw3C_, start: None, end: None rec = self.fetch_seqinfo(seq_id) if self._writing and self._writing["relpath"] == rec["relpath"]: @@ -151,14 +150,10 @@ def fetch(self, seq_id, start=None, end=None): "path: %s, seq_id: %s, start: %s, end: %s", path, seq_id, start, end) - # fabgz = self._open_for_reading(path) - # return fabgz.fetch(seq_id, start, end) - with self._open_for_reading(path) as fabgz: seq = fabgz.fetch(seq_id, start, end) return seq - @functools.lru_cache(maxsize=SEQREPO_LRU_CACHE_MAXSIZE) def fetch_seqinfo(self, seq_id): """fetch sequence info by seq_id @@ -241,6 +236,12 @@ def _upgrade_db(self): @functools.lru_cache() def _open_for_reading(self, path): + """ + Opens a FabgzReader to path, wraps in a LockableFabgzReader for use in context managers. + Places it in an LRU cache so file is only opened once per FastaDir object. Caller must + lock the LockableFabgzReader or otherwise handle concurrent access if sharing between + in-process concurrent execution threads, such as asyncio (e.g. WSGI/ASGI web servers) + """ _logger.debug("Opening for reading: %s", path) if not os.path.exists(path): _logger.error("_open_for_reading path does not exist: %s", path)