From 582d955a131c4f0b56fc253ea761a9799e41b952 Mon Sep 17 00:00:00 2001 From: James Hadfield Date: Thu, 20 Apr 2023 16:20:39 +1200 Subject: [PATCH] Add helper script for downloading dataset+sidecars Adds a small dev-use-only script to download the dataset + sidecar files from a given nextstrain.org URL and give them the correct filename so that you can test the same URL pathname from localhost auspice. I've wanted this for ages but #1603 finally motivated me to do it. --- scripts/get-dataset.sh | 50 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100755 scripts/get-dataset.sh diff --git a/scripts/get-dataset.sh b/scripts/get-dataset.sh new file mode 100755 index 000000000..632e5e4e5 --- /dev/null +++ b/scripts/get-dataset.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +# A simple, non-battle-hardened, script to help downloading a dataset +# and corresponding sidecars for testing locally with auspice. +# +# Examples: +# ./scripts/get-dataset.sh nextstrain.org/zika data +# ./scripts/get-dataset.sh nextstrain.org/community/joverlee521/nextstrain-testing/flu/seasonal/h1n1pdm/ha/09-17 datasets +echo "Getting dataset + sidecars associated with $1 and downloading them to $2" + +if [ ! -d "${2}" ]; then + echo "Directory ${2} doesn't exist!"; + exit 2; +fi + +dataset_suffix=${1#*nextstrain.org/} +dataset_underscores=${dataset_suffix//\//_} +dest="${2%/}/${dataset_underscores}.json" + +main="https://nextstrain.org/charon/getDataset?prefix=${dataset_suffix}" +if [[ $( curl -iI -sw "%{http_code}" "${main}" -o /dev/null ) == 200 ]]; then + curl "${main}" --compressed --output "${dest}" + echo Downloaded main JSON to "${dest}" +else + echo "Cannot download main JSON. Fatal!" + exit 3 +fi + +sidecar="tip-frequencies" +if [[ $( curl -iI -sw "%{http_code}" "${main}&type=${sidecar}" -o /dev/null ) == 200 ]]; then + curl "${main}&type=${sidecar}" --compressed --output "${dest%.json}_${sidecar}.json" + echo Downloaded ${sidecar} JSON to "${dest%.json}_${sidecar}.json" +else + echo "Cannot download ${sidecar} JSON. Continuing..." +fi + +sidecar="root-sequence" +if [[ $( curl -iI -sw "%{http_code}" "${main}&type=${sidecar}" -o /dev/null ) == 200 ]]; then + curl "${main}&type=${sidecar}" --compressed --output "${dest%.json}_${sidecar}.json" + echo Downloaded ${sidecar} JSON to "${dest%.json}_${sidecar}.json" +else + echo "Cannot download ${sidecar} JSON. Continuing..." +fi + +sidecar="measurements" +if [[ $( curl -iI -sw "%{http_code}" "${main}&type=${sidecar}" -o /dev/null ) == 200 ]]; then + curl "${main}&type=${sidecar}" --compressed --output "${dest%.json}_${sidecar}.json" + echo Downloaded ${sidecar} JSON to "${dest%.json}_${sidecar}.json" +else + echo "Cannot download ${sidecar} JSON. Continuing..." +fi \ No newline at end of file