diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..21638c1 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,24 @@ +name: CI + +on: [push, pull_request] + +jobs: + style: + name: Check style + runs-on: 'ubuntu-latest' + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: 3.8 + + - name: Install dependencies + run: python3 -m pip install ".[dev]" + + - name: Ruff checks + run: python3 -m ruff check . + + - name: isort checks + run: python3 -m isort --check . diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..5ba9d71 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,10 @@ +repos: +- repo: https://github.com/charliermarsh/ruff-pre-commit + rev: v0.0.244 + hooks: + - id: ruff +- repo: https://github.com/pycqa/isort + rev: 5.6.4 + hooks: + - id: isort + args: ["--check"] diff --git a/.vscode/launch.json b/.vscode/launch.json index e1e5ce0..2a01a59 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -18,4 +18,4 @@ "module": "anyvar.restapi" } ] -} \ No newline at end of file +} diff --git a/.vscode/settings.json b/.vscode/settings.json index 2b7e46d..371c111 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,3 +1,3 @@ { "python.formatting.provider": "yapf" -} \ No newline at end of file +} diff --git a/bin/cv-load-timing b/bin/cv-load-timing index 593c892..1477d8a 100755 --- a/bin/cv-load-timing +++ b/bin/cv-load-timing @@ -4,20 +4,19 @@ import logging import os import sys -from biocommons.seqrepo import SeqRepo -from bioutils.exceptions import BioutilsError import coloredlogs import redis +import tqdm +from biocommons.seqrepo import SeqRepo +from bioutils.exceptions import BioutilsError from ga4gh.vrs import models from ga4gh.vrs.dataproxy import SeqRepoDataProxy from hgvs.exceptions import HGVSParseError -import tqdm from anyvar import AnyVar -from anyvar.translator import Translator from anyvar.extras.clinvarparser import ClinvarParser from anyvar.storage.redisobjectstore import RedisObjectStore - +from anyvar.translator import Translator _logger = logging.getLogger(__name__) @@ -30,7 +29,7 @@ def firstn(gen, n=None): yield e if i == n: break - + if __name__ == "__main__": @@ -55,7 +54,10 @@ if __name__ == "__main__": n_cvrec = 0 n_exc = 0 for cvrec in pbar: - #_logger.info(f"{cvrec.accession}; {cvrec.record_type}; {len(cvrec.hgvs_expressions)} hgvs expressions") + #_logger.info( + # f"{cvrec.accession}; {cvrec.record_type}; {len(cvrec.hgvs_expressions)} " + # "hgvs expressions" + # ) n_cvrec += 1 n_hgvs += len(cvrec.hgvs_expressions) if n_cvrec % 10 == 0: @@ -63,9 +65,11 @@ if __name__ == "__main__": for he in cvrec.hgvs_expressions: try: v = tlr.from_hgvs(he) - except (ValueError, HGVSParseError, AttributeError, KeyError, BioutilsError): + except ( + ValueError, HGVSParseError, AttributeError, KeyError, BioutilsError + ): v = models.Text(definition=he) _id = av.put_object(v) - #_logger.info(f"stored {_id} for {he}") + _logger.info(f"stored {_id} for {he}") _logger.info(f"{n_hgvs} hgvs expressions in {n_cvrec} clinvar records") diff --git a/docker-build b/docker-build index 7473cb8..3301ec2 100755 --- a/docker-build +++ b/docker-build @@ -10,5 +10,5 @@ tag=$(git describe --tags) e docker build -t $imagename:$tag "$@" . e docker push $imagename:$tag -e docker tag $imagename:$tag $imagename:latest -e docker push $imagename:latest +e docker tag $imagename:$tag $imagename:latest +e docker push $imagename:latest diff --git a/docker-compose.yml b/docker-compose.yml index d9af77f..349d515 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -18,7 +18,7 @@ services: - seqrepo-rest-service # - uta #environment: - # # UTA_DB_URL: + # # UTA_DB_URL: # # SEQREPO_REST_SERVICE_URL: redis: @@ -28,7 +28,7 @@ services: volumes: - anyvar_redis_vol:/data - seqrepo-rest-service: + seqrepo-rest-service: # Test: curl http://localhost:5000/seqrepo/1/sequence/refseq:NM_000551.3 expose: - 5000 @@ -37,7 +37,7 @@ services: image: biocommons/seqrepo-rest-service:latest volumes: - seqrepo_vol:/usr/local/share/seqrepo - + # uta: # # Test: # # psql -XAt postgres://anonymous@localhost/uta -c 'select count(*) from transcript' diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..4a01f39 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,16 @@ +[tool.ruff] +exclude = [ + "venv", + "build", + "dist", + "tests" +] + +line-length = 88 + +[tool.ruff.per-file-ignores] +"__init__.py" = ["F401", "E402"] +"uidoc.py" = ["E501"] + +[tool.isort] +group_by_package = true diff --git a/setup.cfg b/setup.cfg index 554afe5..0e1c1e8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -32,10 +32,13 @@ exclude = [options.extras_require] dev = - ipython + ruff + pre-commit + isort test = pytest pytest-cov + pytest-mock [aliases] @@ -47,12 +50,5 @@ universal = 1 [build_sphinx] all_files = 1 -# http://pep8.readthedocs.org/en/latest/intro.html#error-codes -[flake8] -max-line-length = 120 -exclude = tests/* -max-complexity = 10 -ignore = E129,E221,E241,E251,E303,W291 - [tool:pytest] addopts = --cov=anyvar --cov-report term-missing diff --git a/src/anyvar/anyvar.py b/src/anyvar/anyvar.py index f40dc16..6d54a09 100644 --- a/src/anyvar/anyvar.py +++ b/src/anyvar/anyvar.py @@ -3,22 +3,23 @@ """ -from collections.abc import MutableMapping import logging +from collections.abc import MutableMapping from ga4gh.core import ga4gh_identify from ga4gh.vrs import models, vrs_deref, vrs_enref from anyvar.translate.translate import _Translator - _logger = logging.getLogger(__name__) class AnyVar: def __init__(self, /, translator: _Translator, object_store: MutableMapping): if not isinstance(object_store, MutableMapping): - _logger.warning("AnyVar(object_store=) should be a mutable mapping; you're on your own") + _logger.warning( + "AnyVar(object_store=) should be a mutable mapping; you're on your own" + ) self.object_store = object_store self.translator = translator diff --git a/src/anyvar/extras/clinvarparser.py b/src/anyvar/extras/clinvarparser.py index 7400802..3604f64 100644 --- a/src/anyvar/extras/clinvarparser.py +++ b/src/anyvar/extras/clinvarparser.py @@ -3,13 +3,13 @@ import lxml.etree as le - _logger = logging.getLogger() class VariationArchive: def __init__(self, element): - assert element.tag == "VariationArchive", "Expected node type `VariationArchive`" + assert element.tag == "VariationArchive", \ + "Expected node type `VariationArchive`" self._element = element @property @@ -39,7 +39,7 @@ def record_type(self): @property def variation_name(self): return self._element.get("VariationName") - + @property def variation_type(self): return self._element.get("VariationType") @@ -47,7 +47,7 @@ def variation_type(self): @property def version(self): return self._element.get("Version") - + @property def xrefs(self): return [e.attrib for e in self._element.xpath(".//XRefList/XRef")] @@ -75,4 +75,7 @@ def clinvar_open(fp): import sys fn = sys.argv[1] for va in clinvar_open(fn): - print(f"{va.acv}\t{va.variation_name}\t{va.variation_type}\t{len(va.hgvs_expressions)}\t{len(va.xrefs)}") + print( + f"{va.acv}\t{va.variation_name}\t{va.variation_type}\t" + f"{len(va.hgvs_expressions)}\t{len(va.xrefs)}" + ) diff --git a/src/anyvar/restapi/_data/openapi.yaml b/src/anyvar/restapi/_data/openapi.yaml index d29b3f0..7527858 100644 --- a/src/anyvar/restapi/_data/openapi.yaml +++ b/src/anyvar/restapi/_data/openapi.yaml @@ -615,4 +615,3 @@ components: length: 248956422 alphabet: ACGMNRT added: 2016-08-27T21:17:00Z - diff --git a/src/anyvar/restapi/globals.py b/src/anyvar/restapi/globals.py index b939c3c..14c66f6 100644 --- a/src/anyvar/restapi/globals.py +++ b/src/anyvar/restapi/globals.py @@ -8,10 +8,11 @@ from typing import Any, Callable, Optional from flask import current_app -from ga4gh.vrs.dataproxy import create_dataproxy, _DataProxy -from anyvar.translate.translate import TranslatorSetupException +from ga4gh.vrs.dataproxy import _DataProxy, create_dataproxy -from anyvar.translate.variation_normalizer import VariationNormalizerRestTranslator +from anyvar.translate.translate import TranslatorSetupException +from anyvar.translate.variation_normalizer import \ + VariationNormalizerRestTranslator from ..anyvar import AnyVar from ..storage import create_storage diff --git a/src/anyvar/restapi/routes/allele.py b/src/anyvar/restapi/routes/allele.py index 8535a09..1ca7423 100644 --- a/src/anyvar/restapi/routes/allele.py +++ b/src/anyvar/restapi/routes/allele.py @@ -15,7 +15,9 @@ def put(body): except TranslationException: result["messages"].append(f"Unable to translate {defn}") except NotImplementedError: - result["messages"].append(f"Variation class for {defn} is currently unsupported.") + result["messages"].append( + f"Variation class for {defn} is currently unsupported." + ) else: v_id = av.put_object(v) result["object"] = v.as_dict() diff --git a/src/anyvar/restapi/routes/find_alleles.py b/src/anyvar/restapi/routes/find_alleles.py index 602868d..ecefb30 100644 --- a/src/anyvar/restapi/routes/find_alleles.py +++ b/src/anyvar/restapi/routes/find_alleles.py @@ -1,7 +1,7 @@ -from connexion import NoContent +from anyvar.storage.postgres import PostgresObjectStore from ..globals import get_anyvar -from anyvar.storage.postgres import PostgresObjectStore + def get_ga4gh_alias(seqrepo_data_proxy, accession): md = seqrepo_data_proxy.get_metadata(accession) diff --git a/src/anyvar/restapi/routes/info.py b/src/anyvar/restapi/routes/info.py index 2f4004d..86f8d2b 100644 --- a/src/anyvar/restapi/routes/info.py +++ b/src/anyvar/restapi/routes/info.py @@ -1,8 +1,6 @@ -from connexion import NoContent - -import anyvar import ga4gh.vrs +import anyvar def search(): diff --git a/src/anyvar/restapi/routes/sequence.py b/src/anyvar/restapi/routes/sequence.py index bedef63..f1f9f3f 100644 --- a/src/anyvar/restapi/routes/sequence.py +++ b/src/anyvar/restapi/routes/sequence.py @@ -1,7 +1,7 @@ import logging from ..globals import get_dataproxy -from .utils import get_sequence_ids, problem +from .utils import problem _logger = logging.getLogger(__name__) @@ -12,4 +12,3 @@ def get(alias, start=None, end=None): return problem(422, "Invalid coordinates: start > end") dp = get_dataproxy() return dp.get_sequence(alias, start, end), 200 - diff --git a/src/anyvar/restapi/routes/sequence_metadata.py b/src/anyvar/restapi/routes/sequence_metadata.py index 21ad44a..3baf7b9 100644 --- a/src/anyvar/restapi/routes/sequence_metadata.py +++ b/src/anyvar/restapi/routes/sequence_metadata.py @@ -1,10 +1,6 @@ import logging -from connexion import NoContent - from ..globals import get_dataproxy -from .utils import get_sequence_ids, problem - _logger = logging.getLogger(__name__) diff --git a/src/anyvar/restapi/routes/summary_statistics.py b/src/anyvar/restapi/routes/summary_statistics.py index 40fdd57..845ad6c 100644 --- a/src/anyvar/restapi/routes/summary_statistics.py +++ b/src/anyvar/restapi/routes/summary_statistics.py @@ -1,5 +1,6 @@ from ..globals import get_anyvar + def get(vartype): av = get_anyvar() if vartype == "substitution": diff --git a/src/anyvar/restapi/routes/text.py b/src/anyvar/restapi/routes/text.py index b99a120..b62ade6 100644 --- a/src/anyvar/restapi/routes/text.py +++ b/src/anyvar/restapi/routes/text.py @@ -1,5 +1,3 @@ -from connexion import NoContent - from ..globals import get_anyvar @@ -8,7 +6,7 @@ def put(body): request = body defn = request.pop("definition") v = av.create_text(defn) - id = av.put_object(v) + av.put_object(v) result = { "object": v.as_dict(), "messages": [], diff --git a/src/anyvar/restapi/routes/utils.py b/src/anyvar/restapi/routes/utils.py index 426a1d7..df46867 100644 --- a/src/anyvar/restapi/routes/utils.py +++ b/src/anyvar/restapi/routes/utils.py @@ -1,16 +1,14 @@ # taken from seqrepo-rest-service/src/seqrepo_rest_service/utils.py +import logging +import re from base64 import urlsafe_b64decode, urlsafe_b64encode from binascii import hexlify, unhexlify from http.client import responses as http_responses -import logging -import re import connexion - from bioutils.accessions import infer_namespaces - _logger = logging.getLogger(__name__) @@ -37,12 +35,12 @@ def get_sequence_id(sr, query): * A fully-qualified sequence alias (e.g., VMC:0123 or refseq:NM_01234.5) * A digest or digest prefix from VMC, TRUNC512, or MD5 * A sequence accession (without namespace) - + Returns None if not found; seq_id if only one match; raises - RuntimeError for ambiguous matches. + RuntimeError for ambiguous matches. """ - + seq_ids = get_sequence_ids(sr, query) if len(seq_ids) == 0: _logger.warning(f"No sequence found for {query}") @@ -59,7 +57,7 @@ def get_sequence_ids(sr, query): * A fully-qualified sequence alias (e.g., VMC:0123 or refseq:NM_01234.5) * A digest or digest prefix from VMC, TRUNC512, or MD5 * A sequence accession (without namespace) - + The first match will be returned. """ @@ -73,7 +71,9 @@ def get_sequence_ids(sr, query): def problem(status, message): - return connexion.problem(status=status, title=http_responses[status], detail=message) + return connexion.problem( + status=status, title=http_responses[status], detail=message + ) @@ -105,7 +105,7 @@ def _generate_nsa_options(query): if namespaces: nsa_options = [(ns, query) for ns in namespaces] return nsa_options - + # if hex, try md5 and TRUNC512 if re.match(r"^(?:[0-9A-Fa-f]{8,})$", query): nsa_options = [("MD5", query + "%")] diff --git a/src/anyvar/restapi/routes/variation.py b/src/anyvar/restapi/routes/variation.py index 7eef98c..ed5f4e8 100644 --- a/src/anyvar/restapi/routes/variation.py +++ b/src/anyvar/restapi/routes/variation.py @@ -5,7 +5,7 @@ def put(body): av = get_anyvar() - translator = vm.translator + translator = av.translator request = body @@ -34,4 +34,3 @@ def get(id): return v.as_dict(), 200 except KeyError: return NoContent, 404 - diff --git a/src/anyvar/restapi/routes/variationset.py b/src/anyvar/restapi/routes/variationset.py index 6b472ab..09472c7 100644 --- a/src/anyvar/restapi/routes/variationset.py +++ b/src/anyvar/restapi/routes/variationset.py @@ -19,16 +19,15 @@ def put(body): vo = models.VariationSet(members=defn["member_ids"]) vo._id = ga4gh_identify(vo) av.put_object(vo) - + result = { "messages": messages, "data": vo.as_dict(), } - + return result, 200 def get(id): av = get_anyvar() return av.get_object(id).as_dict(), 200 - diff --git a/src/anyvar/restapi/uidoc.py b/src/anyvar/restapi/uidoc.py index c7f5909..af919d9 100644 --- a/src/anyvar/restapi/uidoc.py +++ b/src/anyvar/restapi/uidoc.py @@ -26,12 +26,9 @@
-