Skip to content

Commit

Permalink
Merge branch 'bacpop-186-v9-db-support' of https://github.com/bacpop/…
Browse files Browse the repository at this point in the history
…beebop_py into bacpop-205-fix-network
  • Loading branch information
absternator committed Dec 6, 2024
2 parents e0161cf + af7603d commit eb5799b
Show file tree
Hide file tree
Showing 14 changed files with 769 additions and 74 deletions.
26 changes: 26 additions & 0 deletions .github/workflows/build_and_push.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
name: build and push docker images
on:
push:
branches:
- main
pull_request:
branches:
- "*"
env:
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
jobs:
build-and-push:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Login to GHCR (GitHub Packages)
uses: docker/#-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
# add --with-dev to below commands to build & push the dev image
- name: Build docker image
run: ./docker/build
- name: Push docker image
run: ./docker/push
21 changes: 21 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,24 @@ Testing can be done in a second terminal (make sure to activate 'beebop_py') by
```
TESTING=True poetry run pytest
```

### Diagrams

- There is a .drawio graph in the `diagrams` folder illustrating the process of running a analysis. This includes
all the files created and how they are used in each job. You can open and view the diagram at [draw.io](https://draw.io).

## Use/Deploy specific version of PopPUNK

To use a specific version, commit or branch of PopPUNK in a beebop_py deployment, you can update `POPPUNK_VERSION` in `common`.

The new dev images built with `/docker/build --with-dev` will have a *-dev* postfix.

### Local Development

You can build the image with `/docker/build --with-dev`, this new image can now be used by Beebop.

### Deployment

A pull request can be created so GHA pushes the images to the docker hub. Add `--with-dev` to the build & push commands `pipeline.yaml`.
**Ensure to remove the `--with-dev` flag before merging the PR.**
Then on the `beebop-deploy` the api image can be updated with the new dev image.
27 changes: 16 additions & 11 deletions beebop/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,18 +304,23 @@ def run_poppunk_internal(sketches: dict,
depends_on=job_assign, **queue_kwargs)
redis.hset("beebop:hash:job:network", p_hash, job_network.id)
# microreact
# delete all previous microreact cluster job results
# delete all previous microreact cluster job results for this project
redis.delete(f"beebop:hash:job:microreact:{p_hash}")
job_microreact = q.enqueue(visualise.microreact,
args=(p_hash,
fs,
full_db_fs,
args,
name_mapping,
species,
redis_host,
queue_kwargs),
depends_on=Dependency([job_assign, job_network], allow_failure=True), **queue_kwargs)
job_microreact = q.enqueue(
visualise.microreact,
args=(
p_hash,
fs,
full_db_fs,
args,
name_mapping,
species,
redis_host,
queue_kwargs,
),
depends_on=Dependency([job_assign, job_network], allow_failure=True),
**queue_kwargs,
)
redis.hset("beebop:hash:job:microreact", p_hash, job_microreact.id)
return jsonify(
response_success(
Expand Down
57 changes: 39 additions & 18 deletions beebop/assignClusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,10 @@ def handle_external_clusters(
config.external_clusters_prefix,
)
if not_found_query_names:
queries_names, queries_clusters, not_found_query_clusters = filter_queries(
queries_names, queries_clusters, not_found_query_names
queries_names, queries_clusters, not_found_query_clusters = (
filter_queries(
queries_names, queries_clusters, not_found_query_names
)
)
output_full_tmp = config.fs.output_tmp(config.p_hash)
not_found_query_names_new, not_found_query_clusters_new = (
Expand Down Expand Up @@ -241,15 +243,19 @@ def handle_not_found_queries(
"""
[Handles queries that were not found in the
initial external clusters file.
This function processes the sketches of the queries that were not found for external clusters from the reference db,
assigns clusters to them from the full db, and then summarizes the clusters. It also
This function processes the sketches of the queries that were not found
for external clusters from the reference db,
assigns clusters to them from the full db, and then
summarizes the clusters. It also
handles all file manipulations needed]
:param config: [ClusteringConfig with all necessary information]
:param sketches_dict: [dictionary with filehash (key) and sketch (value)]
:param sketches_dict: [dictionary
with filehash (key) and sketch (value)]
:param not_found_query_names: [list of sample hashes that were not found]
:param output_full_tmp: [path to temporary output directory]
:param not_found_query_clusters: [set of clusters assigned to initial not found samples]
:param not_found_query_clusters: [set of clusters assigned to
initial not found samples]
:return tuple[list, list]: [list initial not found sample hashes,
list of clusters assigned to initial not found samples]
"""
Expand Down Expand Up @@ -280,6 +286,7 @@ def handle_not_found_queries(

return query_names, query_clusters


def handle_files_manipulation(
config: ClusteringConfig,
output_full_tmp: str,
Expand All @@ -288,21 +295,28 @@ def handle_files_manipulation(
"""
[Handles file manipulations for queries that were not found in the
initial external clusters file.
This function copies include files from the full assign output directory
to the output directory, deletes include files for queries that were not found,
This function copies include files from the
full assign output directory
to the output directory, deletes include files for queries
that were not found,
and merges the partial query graph files.]
:param config: [ClusteringConfig with all necessary information]
:param output_full_tmp: [path to temporary output directory]
:param not_found_query_clusters: [set of clusters assigned to initial not found samples]
:param not_found_query_clusters: [set of clusters assigned
to initial not found samples]
"""
delete_include_files(
config.fs,
config.p_hash,
not_found_query_clusters,
)
copy_include_files(output_full_tmp, config.out_dir)
merge_txt_files(config.fs.partial_query_graph(config.p_hash), config.fs.partial_query_graph_tmp(config.p_hash))
merge_txt_files(
config.fs.partial_query_graph(config.p_hash),
config.fs.partial_query_graph_tmp(config.p_hash),
)


def update_external_clusters(
config: ClusteringConfig,
Expand All @@ -316,8 +330,10 @@ def update_external_clusters(
using the full database.
This function reads the external clusters from the
new previous query clustering file
and updates the initial external clusters file on ref db with the clusters for samples
that were initially not found, and have now been assigned by the current query with the full database.]
and updates the initial external clusters
file on ref db with the clusters for samples
that were initially not found, and have now been
assigned by the current query with the full database.]
:param config: [ClusteringConfig
with all necessary information]
Expand Down Expand Up @@ -399,19 +415,24 @@ def filter_queries(
:param queries_names: [list of sample hashes]
:param queries_clusters: [list of sample PopPUNK clusters]
:param not_found: [list of sample hashes that were not found]
:param config: [ClusteringConfig with all necessary information]
:param not_found: [list of sample hashes
that were not found]
:return tuple[list[str], list[str], set[str]]: [filtered sample hashes,
filtered sample PopPUNK clusters, set of clusters assigned to not found samples]
filtered sample PopPUNK clusters,
set of clusters assigned to not found samples]
"""
filtered_names = [name for name in queries_names if name not in not_found]
filtered_clusters = [
cluster
for name, cluster in zip(queries_names, queries_clusters)
if name not in not_found
]

return filtered_names, filtered_clusters, set(queries_clusters) - set(filtered_clusters)

return (
filtered_names,
filtered_clusters,
set(queries_clusters) - set(filtered_clusters),
)


def delete_include_files(
Expand Down
3 changes: 2 additions & 1 deletion beebop/visualise.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ def microreact(
redis = Redis(host=redis_host)
# get results from previous job
current_job = get_current_job(redis)
assign_result = current_job.dependency.result # gets first dependency result (i.e assign_clusters)
# gets first dependency result (i.e assign_clusters)
assign_result = current_job.dependency.result
external_to_poppunk_clusters = None

try:
Expand Down
8 changes: 0 additions & 8 deletions buildkite/pipeline.yml

This file was deleted.

Loading

0 comments on commit eb5799b

Please # to comment.