From 53a5423400b2c013b3c88f2c3fc6ddcb4664b42a Mon Sep 17 00:00:00 2001 From: Jennings Zhang Date: Fri, 4 Mar 2022 20:05:06 -0500 Subject: [PATCH] :elephant: --- .github/workflows/build.yml | 114 ++++++++++++++++++++++++++++++++++++ Dockerfile | 13 ++-- README.md | 87 ++++----------------------- app.py | 26 -------- bigfiles.py | 107 +++++++++++++++++++++++++++++++++ requirements.txt | 1 + setup.py | 14 ++--- 7 files changed, 244 insertions(+), 118 deletions(-) create mode 100644 .github/workflows/build.yml delete mode 100755 app.py create mode 100755 bigfiles.py diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..34cfd3d --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,114 @@ +# Automatically build multi-architectural tagged container images and push them to DockerHub +# https://github.com/FNNDSC/cookiecutter-chrisapp/wiki/Automatic-Builds + +name: build + +on: + push: + branches: [ main ] + tags: [ '**' ] + pull_request: + branches: [ main ] + +jobs: + publish: + if: github.event_name == 'push' || github.event_name == 'release' + runs-on: ubuntu-20.04 + + services: + registry: + image: registry:2 + ports: + - 5000:5000 + + steps: + - name: Get git tag + id: git_info + if: startsWith(github.ref, 'refs/tags/') + run: echo "::set-output name=tag::${GITHUB_REF##*/}" + - name: Decide image tag name + id: determine + env: + git_tag: ${{ steps.git_info.outputs.tag }} + run: | + repo="${GITHUB_REPOSITORY,,}" # to lower case + # if build triggered by tag, use tag name + tag="${git_tag:-latest}" + + # if tag is a version number prefixed by 'v', remove the 'v' + if [[ "$tag" =~ ^v[0-9].* ]]; then + tag="${tag:1}" + fi + + dock_image=$repo:$tag + echo $dock_image + echo "::set-output name=dock_image::$dock_image" + echo "::set-output name=repo::$repo" + + - uses: actions/checkout@v2 + - uses: docker/setup-qemu-action@v1 + - uses: docker/setup-buildx-action@v1 + with: + driver-opts: network=host + - name: Cache Docker layers + uses: actions/cache@v2 + with: + path: /tmp/.buildx-cache + key: ${{ runner.os }}-buildx-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-buildx- + + - name: Login to DockerHub + id: dockerhub_login + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + + - name: Login to GitHub Container Registry + uses: docker/login-action@v1 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push + uses: docker/build-push-action@v2 + id: docker_build + with: + context: . + file: ./Dockerfile + tags: | + ${{ steps.determine.outputs.dock_image }} + localhost:5000/${{ steps.determine.outputs.dock_image }} + ghcr.io/${{ steps.determine.outputs.dock_image }} + platforms: linux/amd64,linux/arm64,linux/ppc64le + push: true + cache-from: type=local,src=/tmp/.buildx-cache + cache-to: type=local,dest=/tmp/.buildx-cache + + - name: Get plugin meta + id: pluginmeta + run: | + repo=${{ steps.determine.outputs.repo }} + dock_image=${{ steps.determine.outputs.dock_image }} + docker run --rm localhost:5000/$dock_image chris_plugin_info > /tmp/description.json + jq < /tmp/description.json # pretty print in log + echo "::set-output name=title::$(jq -r '.title' < /tmp/description.json)" + + - name: Update DockerHub description + uses: peter-evans/dockerhub-description@v2 + continue-on-error: true # it is not crucial that this works + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + short-description: ${{ steps.pluginmeta.outputs.title }} + readme-filepath: ./README.md + repository: ${{ steps.determine.outputs.repo }} + + - name: Upload to ChRIS Store + if: "!endsWith(steps.determine.outputs.dock_image, ':latest')" + uses: FNNDSC/chrisstore-action@master + with: + descriptor_file: /tmp/description.json + auth: ${{ secrets.CHRIS_STORE_USER }} diff --git a/Dockerfile b/Dockerfile index 3c76768..182dc67 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,13 +1,10 @@ -# Python version can be changed, e.g. -# FROM python:3.8 -# FROM docker.io/fnndsc/conda:python3.10.2-cuda11.6.0 -FROM docker.io/python:3.10.2-slim-buster +FROM docker.io/python:3.10.2-alpine LABEL org.opencontainers.image.authors="FNNDSC " \ - org.opencontainers.image.title="ChRIS Plugin Title" \ - org.opencontainers.image.description="A ChRIS ds plugin that..." + org.opencontainers.image.title="dbg-bigfiles" \ + org.opencontainers.image.description="A ChRIS fs plugin that creates files of random data." -WORKDIR /usr/local/src/app +WORKDIR /usr/local/src/dbg-bigfiles COPY requirements.txt . RUN pip install -r requirements.txt @@ -15,4 +12,4 @@ RUN pip install -r requirements.txt COPY . . RUN pip install . -CMD ["commandname", "--help"] +CMD ["bigfiles", "--help"] diff --git a/README.md b/README.md index 68508f0..009cb26 100644 --- a/README.md +++ b/README.md @@ -1,82 +1,15 @@ -# _ChRIS_ ds Plugin Template +# `dbg-bigfiles` - +[![Version](https://img.shields.io/docker/v/fnndsc/dbg-bigfiles?sort=semver)](https://hub.docker.com/r/fnndsc/dbg-bigfiles) +[![MIT License](https://img.shields.io/github/license/fnndsc/dbg-bigfiles)](https://github.com/FNNDSC/dbg-bigfiles/blob/main/LICENSE) +[![Build](https://github.com/FNNDSC/dbg-bigfiles/actions/workflows/build.yml/badge.svg)](https://github.com/FNNDSC/dbg-bigfiles/actions) +`dbg-bigfiles` is a _ChRIS_ _fs_ plugin that creates files +of random data. It is for stress-testing _CUBE_'s capacity to +deal with large amounts of data. -This is a minimal template repository for _ChRIS_ _ds_ plugin applications. -For a more comprehensive boilerplate, use +## Example -https://github.com/fnndsc/cookiecutter-chrisapp - -## How to Use This Template - -1. Click "Use this template" -2. Clone the newly created repository -3. Replace placeholder text - -```shell -function replace () { - find . -type f -not -path '*/\.*/*' -not -path '*/\venv/*' -exec sed -i -e "s/$1/$2/g" '{}' \; -} - -replace commandname my_command_name -replace pl-appname pl-my-plugin-name -replace fnndsc my_username -``` - -### Template Examples - -Here are some good, complete examples of _ChRIS_ plugins created from this template. - -- https://github.com/FNNDSC/pl-nums2mask -- https://github.com/FNNDSC/pl-nii2mnc-u8 - -Advanced users can `cp -rv .github/workflows` into their own repositories to enable -automatic builds. - -## Abstract - -PROGRAMNAME is a [_ChRIS_](https://chrisproject.org/) -_ds_ plugin which takes in ... as input files and -creates ... as output files. - -## Usage - -```shell -singularity exec docker://fnndsc/pl-appname commandname [--args values...] input/ output/ -``` - -## Examples - -```shell -mkdir incoming/ outgoing/ -mv some.dat other.dat incoming/ -singularity exec docker://fnndsc/pl-appname:latest commandname [--args] incoming/ outgoing/ -``` - -## Development - -### Building - -```shell -docker build -t localhost/fnndsc/pl-appname . -``` - -### Get JSON Representation - -```shell -docker run --rm localhost/fnndsc/pl-appname chris_plugin_info > MyProgram.json -``` - -### Local Test Run - -```shell -docker run --rm -it --userns=host -u $(id -u):$(id -g) \ - -v $PWD/app.py:/usr/local/lib/python3.10/site-packages/app.py:ro \ - -v $PWD/in:/incoming:ro -v $PWD/out:/outgoing:rw -w /outgoing \ - localhost/fnndsc/pl-appname commandname /incoming /outgoing +```bash +singularity exec docker://fnndsc/dbg-bigfiles bigfiles --total '1.8GiB' --size '614.4MiB' output/ ``` diff --git a/app.py b/app.py deleted file mode 100755 index 25b25b0..0000000 --- a/app.py +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env python - -from pathlib import Path -from argparse import ArgumentParser, Namespace -from chris_plugin import chris_plugin - -parser = ArgumentParser(description='cli description') -parser.add_argument('-e', '--example', default='jelly', - help='argument which does not do anything') - - -# documentation: https://fnndsc.github.io/chris_plugin/ -@chris_plugin( - parser=parser, - title='My ChRIS plugin', - category='', # ref. https://chrisstore.co/plugins - min_memory_limit='100Mi', # supported units: Mi, Gi - min_cpu_limit='1000m', # millicores, e.g. "1000m" = 1 CPU core - min_gpu_limit=0 # set min_gpu_limit=1 to enable GPU -) -def main(options: Namespace, inputdir: Path, outputdir: Path): - print(f'Option: {options.example}') - - -if __name__ == '__main__': - main() diff --git a/bigfiles.py b/bigfiles.py new file mode 100755 index 0000000..5d0c6ca --- /dev/null +++ b/bigfiles.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python + +import sys +import math +import random +from pathlib import Path +from argparse import ArgumentParser, Namespace, ArgumentDefaultsHelpFormatter +from chris_plugin import chris_plugin +import tqdm + +parser = ArgumentParser(description='Create files of random data', + formatter_class=ArgumentDefaultsHelpFormatter) +parser.add_argument('-s', '--seed', default=None, + help='random number generator seed') +parser.add_argument('-t', '--total', type=str, default='5GiB', + help='minimum total size of all output (units: B, MiB, GiB)') +parser.add_argument('-z', '--size', type=str, default='2MiB', + help='size of individual files (units: B, MiB, GiB)') + +CHUNK_SIZE = 1048576 + + +@chris_plugin( + parser=parser, + title='Create files of random data', + category='Debug', +) +def main(options: Namespace, outputdir: Path): + + try: + seed = int(options.seed) + except TypeError: + seed = options.seed + + random.seed(seed) + + final_total: int = units(options.total) + per_size: int = units(options.size) + current_total = 0 + n = 0 + + with tqdm.tqdm(total=final_total, unit_scale=True, unit='B') as pbar: + while current_total < final_total: + output_file = outputdir / f'file-{n}.dat' + with output_file.open('wb') as f: + file_size = 0 + while file_size < per_size: + next_chunk_size = min(per_size - file_size, CHUNK_SIZE) + data = random.randbytes(next_chunk_size) + f.write(data) + file_size += next_chunk_size + pbar.update(next_chunk_size) + pbar.write(str(output_file)) + n += 1 + current_total += file_size + + +def units(u: str) -> int: + num, un = split_units(u) + if un == 'b': + return math.ceil(num) + if un == 'mib': + return math.ceil(num * 1048576) + if un == 'gib': + return math.ceil(num * 1073741824) + die(f'unrecognized units "{un}" in: {u}') + + +def split_units(u: str) -> tuple[float, str]: + split = u.split() + if len(split) > 2: + die(f'cannot parse: "{u}"') + if len(split) == 2: + return try_parse(split[0], split[1], u) + + for i, c in enumerate(u): + if c.isdigit() or c == '.': + continue + return try_parse(u[:i], u[i:], u) + + return try_parse(u, 'B', u) + + +def try_parse(n: str, u: str, orig: str) -> tuple[float, str]: + if not u.endswith('B'): + u += 'B' + try: + return float(n), u.lower() + except TypeError: + die(f'cannot parse: "{orig}"') + + +def die(msg: str) -> None: + print(msg, file=sys.stderr) + sys.exit(1) + + +assert split_units('100 Mi') == (100, 'mib') +assert split_units('100 MiB') == (100, 'mib') +assert split_units('100MiB') == (100, 'mib') +assert split_units('100.MiB') == (100.0, 'mib') +assert split_units('100.1MiB') == (100.1, 'mib') +assert split_units('100.1 MiB') == (100.1, 'mib') + + +if __name__ == '__main__': + main() diff --git a/requirements.txt b/requirements.txt index 233c7de..bc498a0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ chris_plugin~=0.0.12 +tqdm~=4.63.0 diff --git a/setup.py b/setup.py index f4da0fb..cbfe981 100644 --- a/setup.py +++ b/setup.py @@ -1,19 +1,19 @@ from setuptools import setup setup( - name = 'chris-plugin-template', + name = 'bigfiles', version = '1.0.0', - description = 'A ChRIS DS plugin template', + description = 'Create files of random data', author = 'FNNDSC', author_email = 'dev@babyMRI.org', - url = 'https://github.com/FNNDSC/python-chrisapp-template', - py_modules = ['app'], - install_requires = ['chris_plugin'], + url = 'https://github.com/FNNDSC/dbg-bigfiles', + py_modules = ['bigfiles'], + install_requires = ['chris_plugin', 'tqdm'], license = 'MIT', - python_requires = '>=3.8.2', + python_requires = '>=3.10.2', entry_points = { 'console_scripts': [ - 'commandname = app:main' + 'bigfiles = bigfiles:main' ] }, classifiers = [