@@ -30,6 +30,7 @@ select process_option in "${options[@]}"; do [[ "$process_option" ]] && break; d
30
30
[ " $process_option " == " Exit" ] && { echo " Exiting." ; exit 0; }
31
31
32
32
uniprot_files=(" uniprotkb/uniprot_sprot.fasta.gz" " uniprotkb/uniprot_trembl.fasta.gz" )
33
+ dbfile=" $mdir /uniprotkb/uniprot_sequences.db"
33
34
34
35
if [[ " $process_option " == " Process PDBs matching UniProt" ]]; then
35
36
if [[ ! -f " $dbfile " ]]; then
@@ -42,10 +43,7 @@ if [[ "$process_option" == "Process PDBs matching UniProt" ]]; then
42
43
fi
43
44
done
44
45
if (( found_files == 0 )) ; then
45
- echo " Uniprot files are missing. These are necessary to match PDBs with UniProt."
46
- echo " You can download the required files from https://www.uniprot.org/downloads and place them in the 'uniprotkb' directory."
47
- echo " The required files are uniprot_sprot.fasta.gz (SwissProt only), which expands from a 92 MB .gz file to a 250 MB database,"
48
- echo " or both uniprot_sprot.fasta.gz and uniprot_trembl.fasta.gz (SwissProt+TrEMBL), which expand from 62 GB .gz files to a ~191 GB database."
46
+ echo " No Uniprot files found. Download from https://www.uniprot.org/downloads and place in 'uniprotkb' directory. Required: uniprot_sprot.fasta.gz (350MB uncompressed); Optional: uniprot_sprot.fasta.gz + uniprot_trembl.fasta.gz (250GB uncompressed)."
49
47
exit 1
50
48
else
51
49
echo " Creating database."
@@ -54,8 +52,8 @@ if [[ "$process_option" == "Process PDBs matching UniProt" ]]; then
54
52
# Only process the files found
55
53
for filepath in " ${uniprot_files[@]} " ; do
56
54
if [[ -f " $mdir /$filepath " ]]; then
57
- # logic for creating database here...
58
55
echo " Processing $filepath ..."
56
+ gunzip -c " $mdir /$filepath " | ./bin/fasta_to_sqlite
59
57
fi
60
58
done
61
59
0 commit comments