Skip to content

Commit

Permalink
Use separate databases (source, anon, secret) for pytest
Browse files Browse the repository at this point in the history
  • Loading branch information
martinburchell committed Jan 2, 2025
1 parent 7deffd5 commit cbdb4ba
Show file tree
Hide file tree
Showing 26 changed files with 751 additions and 253 deletions.
8 changes: 7 additions & 1 deletion .github/workflows/python-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,18 @@ jobs:
include:
# Just one python version for the other DB engines
- engine: mysql
scheme: mysql+mysqldb
port: 3306
python-version: "3.10"
- engine: sqlserver
scheme: mssql+pymssql
port: 1433
python-version: "3.10"
- engine: sqlserver
scheme: mssql+pyodbc
python-version: "3.10"
# - engine: postgres
# scheme: postgresql
# port: 5432
# python-version: "3.10"
steps:
Expand Down Expand Up @@ -55,4 +61,4 @@ jobs:
- name: Python tests
run: |
set -euxo pipefail
${GITHUB_WORKSPACE}/github_action_scripts/python_tests.sh ${{ matrix.engine }} ${{ matrix.port }}
${GITHUB_WORKSPACE}/github_action_scripts/python_tests.sh ${HOME}/venv/bin/pytest ${{ matrix.engine }} ${{ matrix.scheme }} ${{ matrix.port }}
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,6 @@ celerybeat.pid
.dir-locals.el

# Test database
crate_test.sqlite
crate_test*.sqlite

crate_anon/nlp_manager/compiled_nlp_classes/*
8 changes: 7 additions & 1 deletion crate_anon/anonymise/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@
===============================================================================
The mere existence of this file makes Python treat the directory as a package.
Anonymisation package.
"""

from sqlalchemy import MetaData
from sqlalchemy.orm import declarative_base

# Access through SecretBase.metadata
SecretBase = declarative_base(metadata=MetaData())
4 changes: 2 additions & 2 deletions crate_anon/anonymise/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,10 @@
AnonymiseDatabaseSafeConfigKeys as SK,
DEFAULT_CHUNKSIZE,
DEFAULT_REPORT_EVERY,
DEMO_CONFIG,
HashConfigKeys as HK,
SEP,
)
from crate_anon.anonymise.demo_config import get_demo_config
from crate_anon.anonymise.dd import DataDictionary
from crate_anon.anonymise.scrub import (
NonspecificScrubber,
Expand Down Expand Up @@ -542,7 +542,7 @@ def __init__(
if RUNNING_WITHOUT_CONFIG or mock:
# Running in a mock environment; no config required
filename = None
fileobj = StringIO(DEMO_CONFIG)
fileobj = StringIO(get_demo_config())
else:
print(
f"You must set the {ANON_CONFIG_ENV_VAR} environment "
Expand Down
7 changes: 6 additions & 1 deletion crate_anon/anonymise/config_singleton.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,16 @@
"""

import os

from crate_anon.anonymise.config import Config
from crate_anon.common.constants import EnvVar


# =============================================================================
# Singleton config
# =============================================================================

config = Config()
mock = EnvVar.RUNNING_TESTS in os.environ

config = Config(mock=mock)
20 changes: 19 additions & 1 deletion crate_anon/anonymise/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@

ANON_CONFIG_ENV_VAR = "CRATE_ANON_CONFIG"


# =============================================================================
# Data dictionary
# =============================================================================
Expand All @@ -86,6 +85,16 @@

MAX_PID_STR = "9" * 10 # e.g. NHS numbers are 10-digit

# Better overall than string.maketrans:
ODD_CHARS_TRANSLATE = [chr(x) for x in range(0, 256)]
for c in "()/ ":
ODD_CHARS_TRANSLATE[ord(c)] = "_"
for i in range(0, 32):
ODD_CHARS_TRANSLATE[i] = "_"
for i in range(127, 256):
ODD_CHARS_TRANSLATE[i] = "_"
ODD_CHARS_TRANSLATE = "".join(ODD_CHARS_TRANSLATE)

TridType = Integer
MAX_TRID = 2**31 - 1
# https://dev.mysql.com/doc/refman/5.0/en/numeric-type-overview.html
Expand Down Expand Up @@ -870,3 +879,12 @@ class HashConfigKeys:
# source2 = thing
# ... you can't have multiple keys with the same name.
# https://stackoverflow.com/questions/287757


class PatientInfoConstants:
SECRET_MAP_TABLENAME = "secret_map"
PID_FIELDNAME = "pid"
MPID_FIELDNAME = "mpid"
RID_FIELDNAME = "rid"
MRID_FIELDNAME = "mrid"
TRID_FIELDNAME = "trid"
9 changes: 5 additions & 4 deletions crate_anon/anonymise/make_demo_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,11 @@

from crate_anon.anonymise.constants import CHARSET

from crate_anon.testing import Base
from crate_anon.testing import SourceTestBase
from crate_anon.testing.factories import (
DemoFilenameDocFactory,
DemoPatientFactory,
SourceTestBaseFactory,
set_sqlalchemy_session_on_all_factories,
)
from crate_anon.testing.models import (
Expand Down Expand Up @@ -121,8 +122,8 @@ def mk_demo_database(
# 2. Create tables

log.info("Creating tables (dropping them first if required).")
Base.metadata.drop_all(engine, checkfirst=True)
Base.metadata.create_all(engine, checkfirst=True)
SourceTestBase.metadata.drop_all(engine, checkfirst=True)
SourceTestBase.metadata.create_all(engine, checkfirst=True)

# 3. Insert

Expand All @@ -132,7 +133,7 @@ def mk_demo_database(
f"words in notes."
)

set_sqlalchemy_session_on_all_factories(session)
set_sqlalchemy_session_on_all_factories(SourceTestBaseFactory, session)
log.info("Inserting data.")

total_words = 0
Expand Down
23 changes: 6 additions & 17 deletions crate_anon/anonymise/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,17 @@
from cardinal_pythonlib.sqlalchemy.orm_query import exists_orm
from sqlalchemy import (
Column,
MetaData,
Text,
)
from sqlalchemy.exc import IntegrityError
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm.exc import NoResultFound
from sqlalchemy.orm.session import Session

from crate_anon.anonymise import SecretBase
from crate_anon.anonymise.config_singleton import config
from crate_anon.anonymise.constants import (
MAX_TRID,
PatientInfoConstants,
TABLE_KWARGS,
TridType,
)
Expand All @@ -63,20 +63,9 @@
from crate_anon.anonymise.scrub import PersonalizedScrubber

log = logging.getLogger(__name__)
admin_meta = MetaData()
AdminBase = declarative_base(metadata=admin_meta)


class PatientInfoConstants:
SECRET_MAP_TABLENAME = "secret_map"
PID_FIELDNAME = "pid"
MPID_FIELDNAME = "mpid"
RID_FIELDNAME = "rid"
MRID_FIELDNAME = "mrid"
TRID_FIELDNAME = "trid"


class PatientInfo(AdminBase):
class PatientInfo(SecretBase):
"""
Represent patient information in the secret admin database.
Expand Down Expand Up @@ -217,7 +206,7 @@ def set_scrubber_info(self, scrubber: "PersonalizedScrubber") -> None:
self.tp_scrubber_text = None # type: Optional[str]


class TridRecord(AdminBase):
class TridRecord(SecretBase):
"""
Records the mapping from patient ID (PID) to integer transient research ID
(TRID), and makes new TRIDs as required.
Expand Down Expand Up @@ -286,7 +275,7 @@ def new_trid(cls, session: Session, pid: Union[int, str]) -> int:
session.rollback()


class OptOutPid(AdminBase):
class OptOutPid(SecretBase):
"""
Records the PID values of patients opting out of the anonymised database.
"""
Expand Down Expand Up @@ -342,7 +331,7 @@ def add(cls, session: Session, pid: Union[int, str]) -> None:
# https://stackoverflow.com/questions/12297156/fastest-way-to-insert-object-if-it-doesnt-exist-with-sqlalchemy # noqa


class OptOutMpid(AdminBase):
class OptOutMpid(SecretBase):
"""
Records the MPID values of patients opting out of the anonymised database.
"""
Expand Down
Loading

0 comments on commit cbdb4ba

Please # to comment.