Skip to content

Commit

Permalink
Merge pull request #770 from samblau/qchem
Browse files Browse the repository at this point in the history
Q-Chem 6 + Misc Updates
  • Loading branch information
Zhuoying authored Mar 14, 2023
2 parents 17e7a7f + 30c1028 commit a041423
Show file tree
Hide file tree
Showing 60 changed files with 183,676 additions and 64 deletions.
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
export PATH=$HOME/miniconda3/bin:$PATH
conda create --name test_env python=3.8
source activate test_env
conda install -c conda-forge openbabel pymatgen
conda install -c conda-forge openbabel gcc_linux-64
pip install -r requirements-ci.txt
pip install .
no_output_timeout: 1h
Expand Down
17 changes: 9 additions & 8 deletions atomate/qchem/drones.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,9 @@ def generate_doc(self, dir_name, qcinput_files, qcoutput_files, multirun):
if d["output"]["job_type"] in ["freq", "frequency"]:
d["output"]["frequencies"] = d_calc_final["frequencies"]
# Note: for single-atom freq calcs, this key may not exist
d["output"]["frequency_modes"] = d_calc_final.get("frequency_mode_vectors", [])
d["output"]["frequency_modes"] = d_calc_final.get(
"frequency_mode_vectors", []
)
d["output"]["enthalpy"] = d_calc_final["total_enthalpy"]
d["output"]["entropy"] = d_calc_final["total_entropy"]
if d["input"]["job_type"] in ["opt", "optimization", "ts"]:
Expand Down Expand Up @@ -275,12 +277,11 @@ def generate_doc(self, dir_name, qcinput_files, qcoutput_files, multirun):
if d_calc_final["CDS_gradients"] is not None:
d["output"]["CDS_gradients"] = d_calc_final["CDS_gradients"][0]

if d["output"]["job_type"] == "force":
d["output"]["gradients"] = d_calc_final["gradients"][0]
if d_calc_final["pcm_gradients"] is not None:
d["output"]["pcm_gradients"] = d_calc_final["pcm_gradients"][0]
if d_calc_final["CDS_gradients"] is not None:
d["output"]["CDS_gradients"] = d_calc_final["CDS_gradients"][0]
if d["output"]["job_type"] in ["force", "sp"]:
d["output"]["dipoles"] = d_calc_final["dipoles"]
if "gap_info" in d_calc_final:
if d_calc_final["gap_info"] is not None:
d["output"]["gap_info"] = d_calc_final["gap_info"]

opt_trajectory = []
calcs = copy.deepcopy(d["calcs_reversed"])
Expand Down Expand Up @@ -451,7 +452,7 @@ def process_qchem_multirun(dir_name, input_files, output_files):
qchem_input_file = os.path.join(dir_name, input_files.get(key))
qchem_output_file = os.path.join(dir_name, output_files.get(key))
multi_out = QCOutput.multiple_outputs_from_file(
QCOutput, qchem_output_file, keep_sub_files=False
filename=qchem_output_file, keep_sub_files=False
)
multi_in = QCInput.from_multi_jobs_file(qchem_input_file)
for ii, out in enumerate(multi_out):
Expand Down
136 changes: 134 additions & 2 deletions atomate/qchem/firetasks/parse_outputs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import json
import os
import shutil
import struct
from monty.io import zopen

from fireworks import FiretaskBase, FWAction, explicit_serialize
from fireworks.utilities.fw_serializers import DATETIME_HANDLER
Expand Down Expand Up @@ -50,6 +53,8 @@ class QChemToDb(FiretaskBase):
"calc_loc",
"input_file",
"output_file",
"parse_grad_file",
"parse_hess_file",
"additional_fields",
"db_file",
"fw_spec_field",
Expand Down Expand Up @@ -84,13 +89,123 @@ def run_task(self, fw_spec):
multirun=multirun,
)

logger.info("Drone finished!")

# parse the GRAD file, if desired and if it is present
if self.get("parse_grad_file", False):
grad_file = None
if os.path.exists(os.path.join(calc_dir, "GRAD.gz")):
grad_file = os.path.join(calc_dir, "GRAD.gz")
elif os.path.exists(os.path.join(calc_dir, "GRAD")):
grad_file = os.path.join(calc_dir, "GRAD")
elif os.path.exists(os.path.join(calc_dir, "scratch/GRAD.gz")):
grad_file = os.path.join(calc_dir, "scratch/GRAD.gz")
elif os.path.exists(os.path.join(calc_dir, "scratch/GRAD")):
grad_file = os.path.join(calc_dir, "scratch/GRAD")
elif os.path.exists(os.path.join(calc_dir, "131.0.gz")):
grad_file = os.path.join(calc_dir, "131.0.gz")
elif os.path.exists(os.path.join(calc_dir, "131.0")):
grad_file = os.path.join(calc_dir, "131.0")
elif os.path.exists(os.path.join(calc_dir, "scratch/131.0.gz")):
grad_file = os.path.join(calc_dir, "scratch/131.0.gz")
elif os.path.exists(os.path.join(calc_dir, "scratch/131.0")):
grad_file = os.path.join(calc_dir, "scratch/131.0")

if grad_file is None:
task_doc["warnings"]["grad_file_missing"] = True
else:
logger.info("Parsing gradient file")
grad = []
if grad_file[-5:] == "131.0" or grad_file[-8:] == "131.0.gz":
tmp_grad_data = []
logger.info("Gradient file is binary")
with zopen(grad_file, mode="rb") as file:
binary = file.read()
for ii in range(int(len(binary) / 8)):
tmp_grad_data.append(
struct.unpack("d", binary[ii * 8 : (ii + 1) * 8])[0]
)
grad = []
for ii in range(int(len(tmp_grad_data) / 3)):
grad.append(
[
float(tmp_grad_data[ii * 3]),
float(tmp_grad_data[ii * 3 + 1]),
float(tmp_grad_data[ii * 3 + 2]),
]
)
else:
logger.info("Gradient file is text")
with zopen(grad_file, mode="rt", encoding="ISO-8859-1") as f:
lines = f.readlines()
for line in lines:
split_line = line.split()
if len(split_line) == 3:
grad.append(
[
float(split_line[0]),
float(split_line[1]),
float(split_line[2]),
]
)
task_doc["output"]["precise_gradients"] = grad
logger.info("Done parsing gradient. Now removing scratch directory")
if os.path.exists(os.path.join(calc_dir, "scratch")):
shutil.rmtree(os.path.join(calc_dir, "scratch"))

# parse the HESS file(s), if desired and if one or multiple are present
if self.get("parse_hess_file", False):
hess_files = []
for filename in os.listdir(calc_dir):
if "HESS" in filename:
hess_files.append(filename)
elif filename == "scratch":
for subfilename in os.listdir(os.path.join(calc_dir, "scratch")):
if "HESS" in subfilename:
hess_files.append("scratch/" + subfilename)
elif subfilename == "132.0" or subfilename == "132.0.gz":
hess_files.append("scratch/" + subfilename)

if len(hess_files) == 0:
task_doc["warnings"]["hess_file_missing"] = True
else:
logger.info("Parsing Hessian file")
hess_data = {}
for hess_name in hess_files:
if hess_name[-5:] == "132.0" or hess_name[-8:] == "132.0.gz":
logger.info("Hessian file is binary")
tmp_hess_data = []
with zopen(
os.path.join(calc_dir, hess_name), mode="rb"
) as file:
binary = file.read()
for ii in range(int(len(binary) / 8)):
tmp_hess_data.append(
struct.unpack("d", binary[ii * 8 : (ii + 1) * 8])[0]
)
hess_data[hess_name] = tmp_hess_data
else:
logger.info("Hessian file is text")
with zopen(
os.path.join(calc_dir, hess_name),
mode="rt",
encoding="ISO-8859-1",
) as f:
hess_data[hess_name] = f.readlines()
task_doc["output"]["hess_data"] = hess_data
logger.info("Done parsing Hessian. Now removing scratch directory")
if os.path.exists(os.path.join(calc_dir, "scratch")):
shutil.rmtree(os.path.join(calc_dir, "scratch"))

# Check for additional keys to set based on the fw_spec
logger.info("Updating fw_spec_field")
if self.get("fw_spec_field"):
task_doc.update(
{self.get("fw_spec_field"): fw_spec.get(self.get("fw_spec_field"))}
)

# Update fw_spec with final/optimized structure
logger.info("Updating charges in spec")
update_spec = {}
if task_doc.get("output").get("optimized_molecule"):
update_spec["prev_calc_molecule"] = task_doc["output"]["optimized_molecule"]
Expand All @@ -101,14 +216,17 @@ def run_task(self, fw_spec):
update_spec["prev_calc_esp"] = task_doc["output"]["ESP"]

# get the database connection
logger.info("Get database connection")
db_file = env_chk(self.get("db_file"), fw_spec)

# db insertion or taskdoc dump
if not db_file:
with open(os.path.join(calc_dir, "task.json"), "w") as f:
f.write(json.dumps(task_doc, default=DATETIME_HANDLER))
else:
logger.info("Connecting to QChemCalcDb")
mmdb = QChemCalcDb.from_db_file(db_file, admin=True)
logger.info("Starting task_doc insert")
t_id = mmdb.insert(task_doc)
logger.info(f"Finished parsing with task_id: {t_id}")

Expand Down Expand Up @@ -195,12 +313,26 @@ def run_task(self, fw_spec):
task_doc_clean["orig"]["molecule"]["spin_multiplicity"] = 1
task_doc_clean["output"]["initial_molecule"]["charge"] = 1
task_doc_clean["output"]["initial_molecule"]["spin_multiplicity"] = 1
task_doc_clean["output"]["initial_molecule"]["sites"] = [{'name': 'H', 'species': [{'element': 'H', 'occu': 1}], 'xyz': [0.0, 0.0, 0.0], 'properties': {}}]
task_doc_clean["output"]["initial_molecule"]["sites"] = [
{
"name": "H",
"species": [{"element": "H", "occu": 1}],
"xyz": [0.0, 0.0, 0.0],
"properties": {},
}
]
task_doc_clean["output"]["mulliken"] = [+1.000000]
task_doc_clean["output"]["resp"] = [+1.000000]
task_doc_clean["output"]["optimized_molecule"]["charge"] = 1
task_doc_clean["output"]["optimized_molecule"]["spin_multiplicity"] = 1
task_doc_clean["output"]["optimized_molecule"]["sites"] = [{'name': 'H', 'species': [{'element': 'H', 'occu': 1}], 'xyz': [0.0, 0.0, 0.0], 'properties': {}}]
task_doc_clean["output"]["optimized_molecule"]["sites"] = [
{
"name": "H",
"species": [{"element": "H", "occu": 1}],
"xyz": [0.0, 0.0, 0.0],
"properties": {},
}
]
task_doc_clean["output"]["final_energy"] = (
task_doc_2["output"]["final_energy"] - task_doc_1["output"]["final_energy"]
)
Expand Down
85 changes: 85 additions & 0 deletions atomate/qchem/firetasks/tests/test_parse_outputs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import os
import shutil
import unittest

from pymatgen.core import Molecule
from pymatgen.io.qchem.inputs import QCInput

from atomate.qchem.firetasks.parse_outputs import QChemToDb
from atomate.utils.testing import AtomateTest

from monty.serialization import loadfn

__author__ = "Samuel Blau"
__email__ = "samblau1@gmail.com"

module_dir = os.path.dirname(os.path.abspath(__file__))


class TestParseOutputQChem_grads(AtomateTest):
def setUp(self, lpad=False):
super().setUp(lpad=False)

def tearDown(self):
pass

def test_parse_grad_good(self):
my_calc_dir = os.path.join(module_dir, "..", "..", "test_files","parse_grad_good")
ft = QChemToDb(calc_dir=my_calc_dir, parse_grad_file=True)
ft.run_task({})
task_doc = loadfn(os.path.join(my_calc_dir,"task.json"))
self.assertEqual(task_doc["output"]["final_energy"], -274.6893362188)
self.assertEqual(len(task_doc["output"]["precise_gradients"]), 10)
self.assertEqual(task_doc["output"]["precise_gradients"][0], [0.0090906486788787, 0.016150932052898, 0.0054568671405536])
self.assertEqual(task_doc["output"]["precise_gradients"][-1], [0.0014495621906601, -0.0018570062958895, 0.0012478282193499])
os.remove(os.path.join(my_calc_dir, "task.json"))

def test_parse_grad_131(self):
my_calc_dir = os.path.join(module_dir, "..", "..", "test_files","tmqm_grad_pcm")
ft = QChemToDb(calc_dir=my_calc_dir, parse_grad_file=True)
ft.run_task({})
task_doc = loadfn(os.path.join(my_calc_dir,"task.json"))
self.assertEqual(task_doc["output"]["final_energy"], -2791.8404057999)
self.assertEqual(len(task_doc["output"]["precise_gradients"]), 25)
self.assertEqual(task_doc["output"]["precise_gradients"][0], [-2.7425178677332305e-05, 1.8017443772144412e-06, -2.3689773769176685e-06])
self.assertEqual(task_doc["output"]["precise_gradients"][-1], [0.0028753270363098644, -0.000392640066359285, 0.004405091870637312])
os.remove(os.path.join(my_calc_dir, "task.json"))

def test_parse_grad_bad(self):
my_calc_dir = os.path.join(module_dir, "..", "..", "test_files","parse_grad_bad")
ft = QChemToDb(calc_dir=my_calc_dir, parse_grad_file=True)
ft.run_task({})
task_doc = loadfn(os.path.join(my_calc_dir,"task.json"))
self.assertEqual(task_doc["warnings"]["grad_file_missing"], True)
os.remove(os.path.join(my_calc_dir, "task.json"))


class TestParseOutputQChem_hess(AtomateTest):
def setUp(self, lpad=False):
os.makedirs(os.path.join(module_dir, "..", "..", "test_files", "freq_save_hess", "scratch"))
shutil.copyfile(
os.path.join(module_dir, "..", "..", "test_files", "freq_save_hess", "BUP_scratch", "132.0"),
os.path.join(module_dir, "..", "..", "test_files", "freq_save_hess", "scratch", "132.0"),
)
shutil.copyfile(
os.path.join(module_dir, "..", "..", "test_files", "freq_save_hess", "BUP_scratch", "HESS"),
os.path.join(module_dir, "..", "..", "test_files", "freq_save_hess", "scratch", "HESS"),
)
super().setUp(lpad=False)

def test_parse_hess(self):
my_calc_dir = os.path.join(module_dir, "..", "..", "test_files","freq_save_hess")
ft = QChemToDb(calc_dir=my_calc_dir, parse_hess_file=True)
ft.run_task({})
task_doc = loadfn(os.path.join(my_calc_dir,"task.json"))
self.assertEqual(task_doc["output"]["final_energy"], -151.3244603665)
self.assertEqual(task_doc["output"]["hess_data"]["scratch/132.0"][0],0.12636293260949633)
self.assertEqual(task_doc["output"]["hess_data"]["scratch/132.0"][-2],-0.2025032138024329)
self.assertEqual(task_doc["output"]["hess_data"]["scratch/HESS"][-2], ' -0.175476533300377 -0.202503213802433 0.205623571433770 \n')
os.remove(os.path.join(my_calc_dir, "task.json"))

def tearDown(self):
pass

if __name__ == "__main__":
unittest.main()
12 changes: 8 additions & 4 deletions atomate/qchem/firetasks/tests/test_run_calc.py
Original file line number Diff line number Diff line change
Expand Up @@ -543,10 +543,14 @@ def test_RunQChemFake(self):
).data
this_out = QCOutput("mol.qout").data
for key in ref_out:
try:
self.assertEqual(ref_out[key], this_out[key])
except ValueError:
np.testing.assert_array_equal(ref_out[key], this_out[key])
if key == "dipoles":
self.assertEqual(ref_out[key]["total"], this_out[key]["total"])
np.testing.assert_array_equal(ref_out[key]["dipole"], this_out[key]["dipole"])
else:
try:
self.assertEqual(ref_out[key], this_out[key])
except ValueError:
np.testing.assert_array_equal(ref_out[key], this_out[key])
for filename in os.listdir(
os.path.join(module_dir, "..", "..", "test_files", "real_run")
):
Expand Down
Loading

0 comments on commit a041423

Please # to comment.