Skip to content

Commit

Permalink
fix: switch to wget and write some logging when downloading genome an…
Browse files Browse the repository at this point in the history
…notations
  • Loading branch information
ftabaro committed Mar 12, 2024
1 parent bc99687 commit c49e4ff
Showing 1 changed file with 22 additions and 18 deletions.
40 changes: 22 additions & 18 deletions workflow/scripts/download-gtf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,35 @@ set -e

URL="${snakemake_params[url]}"

mkdir -p $(dirname ${snakemake_log})
touch ${snakemake_log}

if [[ $URL == *.gz ]]; then
TMP=$(mktemp -u --suffix .gz)
echo "$URL refers to gzipped file. Temp file: $TMP" >> ${snakemake_log}
else
TMP=$(mktemp -u)
echo "$URL does not refer to gzipped file. Temp file: $TMP" >> ${snakemake_log}
fi

echo "Downloading to $TMP" | tee -a ${snakemake_log}
echo "Downloading to $TMP" >> ${snakemake_log}

OUTPUT=${snakemake_output}
mkdir -pv $(dirname $OUTPUT)

# wget -O $TMP "$URL"
curl "$URL" \
-H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/118.0' \
-H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8' \
-H 'Accept-Language: en-US,en;q=0.5' \
-H 'Accept-Encoding: gzip, deflate' \
-H 'Connection: keep-alive' \
-H 'Upgrade-Insecure-Requests: 1' \
-H 'DNT: 1' \
-H 'Sec-GPC: 1' \
-H 'Pragma: no-cache' \
-H 'Cache-Control: no-cache' \
--silent \
--output $TMP
wget "$URL" \
--user-agent=' Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/118.0' \
--header='Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8' \
--header='Accept-Language: en-US,en;q=0.5' \
--header='Accept-Encoding: gzip, deflate' \
--header='Connection: keep-alive' \
--header='Upgrade-Insecure-Requests: 1' \
--header='DNT: 1' \
--header='Sec-GPC: 1' \
--header='Pragma: no-cache' \
--header='Cache-Control: no-cache' \
--quiet \
--output-document="$TMP"

sleep $(( $RANDOM % 10 + 2 ))

Expand All @@ -37,10 +41,10 @@ if [[ $URL == *.gz ]] && [[ ! $OUTPUT == *.gz ]]; then
sleep $(( $RANDOM % 10 + 2 ))
fi

if grep -v '#' "${TMP%.gz}" | head -n 1 | grep -q '^chr' | tee -a ${snakemake_log}; then
echo "Mv'ing to $OUTPUT" | tee -a ${snakemake_log}
if grep -v '#' "${TMP%.gz}" | head -n 1 | grep -q '^chr' >> ${snakemake_log}; then
echo "Mv'ing to $OUTPUT" >> ${snakemake_log}
mv $TMP $OUTPUT
else
echo "Adding \"chr\" to first column, then move to $OUTPUT" | tee -a ${snakemake_log}
echo "Adding \"chr\" to first column, then move to $OUTPUT" >> ${snakemake_log}
awk -F "\t" -v OFS="\t" '!/^#/{print "chr"$0}/#/{print}' ${TMP%.gz} > $OUTPUT
fi

0 comments on commit c49e4ff

Please # to comment.