Merge pull request #770 from samblau/qchem

Q-Chem 6 + Misc Updates
hackingmaterials · Mar 14, 2023 · a041423 · a041423
2 parents 17e7a7f + 30c1028
commit a041423
Show file tree

Hide file tree

Showing 60 changed files with 183,676 additions and 64 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -15,7 +15,7 @@ jobs:
             export PATH=$HOME/miniconda3/bin:$PATH
             conda create --name test_env python=3.8
             source activate test_env
-            conda install -c conda-forge openbabel pymatgen
+            conda install -c conda-forge openbabel gcc_linux-64
             pip install -r requirements-ci.txt
             pip install .
           no_output_timeout: 1h

diff --git a/atomate/qchem/drones.py b/atomate/qchem/drones.py
@@ -233,7 +233,9 @@ def generate_doc(self, dir_name, qcinput_files, qcoutput_files, multirun):
             if d["output"]["job_type"] in ["freq", "frequency"]:
                 d["output"]["frequencies"] = d_calc_final["frequencies"]
                 # Note: for single-atom freq calcs, this key may not exist
-                d["output"]["frequency_modes"] = d_calc_final.get("frequency_mode_vectors", [])
+                d["output"]["frequency_modes"] = d_calc_final.get(
+                    "frequency_mode_vectors", []
+                )
                 d["output"]["enthalpy"] = d_calc_final["total_enthalpy"]
                 d["output"]["entropy"] = d_calc_final["total_entropy"]
                 if d["input"]["job_type"] in ["opt", "optimization", "ts"]:
@@ -275,12 +277,11 @@ def generate_doc(self, dir_name, qcinput_files, qcoutput_files, multirun):
                 if d_calc_final["CDS_gradients"] is not None:
                     d["output"]["CDS_gradients"] = d_calc_final["CDS_gradients"][0]
 
-            if d["output"]["job_type"] == "force":
-                d["output"]["gradients"] = d_calc_final["gradients"][0]
-                if d_calc_final["pcm_gradients"] is not None:
-                    d["output"]["pcm_gradients"] = d_calc_final["pcm_gradients"][0]
-                if d_calc_final["CDS_gradients"] is not None:
-                    d["output"]["CDS_gradients"] = d_calc_final["CDS_gradients"][0]
+            if d["output"]["job_type"] in ["force", "sp"]:
+                d["output"]["dipoles"] = d_calc_final["dipoles"]
+                if "gap_info" in d_calc_final:
+                    if d_calc_final["gap_info"] is not None:
+                        d["output"]["gap_info"] = d_calc_final["gap_info"]
 
             opt_trajectory = []
             calcs = copy.deepcopy(d["calcs_reversed"])
@@ -451,7 +452,7 @@ def process_qchem_multirun(dir_name, input_files, output_files):
                 qchem_input_file = os.path.join(dir_name, input_files.get(key))
                 qchem_output_file = os.path.join(dir_name, output_files.get(key))
                 multi_out = QCOutput.multiple_outputs_from_file(
-                    QCOutput, qchem_output_file, keep_sub_files=False
+                    filename=qchem_output_file, keep_sub_files=False
                 )
                 multi_in = QCInput.from_multi_jobs_file(qchem_input_file)
                 for ii, out in enumerate(multi_out):

diff --git a/atomate/qchem/firetasks/parse_outputs.py b/atomate/qchem/firetasks/parse_outputs.py
@@ -1,5 +1,8 @@
 import json
 import os
+import shutil
+import struct
+from monty.io import zopen
 
 from fireworks import FiretaskBase, FWAction, explicit_serialize
 from fireworks.utilities.fw_serializers import DATETIME_HANDLER
@@ -50,6 +53,8 @@ class QChemToDb(FiretaskBase):
         "calc_loc",
         "input_file",
         "output_file",
+        "parse_grad_file",
+        "parse_hess_file",
         "additional_fields",
         "db_file",
         "fw_spec_field",
@@ -84,13 +89,123 @@ def run_task(self, fw_spec):
             multirun=multirun,
         )
 
+        logger.info("Drone finished!")
+
+        # parse the GRAD file, if desired and if it is present
+        if self.get("parse_grad_file", False):
+            grad_file = None
+            if os.path.exists(os.path.join(calc_dir, "GRAD.gz")):
+                grad_file = os.path.join(calc_dir, "GRAD.gz")
+            elif os.path.exists(os.path.join(calc_dir, "GRAD")):
+                grad_file = os.path.join(calc_dir, "GRAD")
+            elif os.path.exists(os.path.join(calc_dir, "scratch/GRAD.gz")):
+                grad_file = os.path.join(calc_dir, "scratch/GRAD.gz")
+            elif os.path.exists(os.path.join(calc_dir, "scratch/GRAD")):
+                grad_file = os.path.join(calc_dir, "scratch/GRAD")
+            elif os.path.exists(os.path.join(calc_dir, "131.0.gz")):
+                grad_file = os.path.join(calc_dir, "131.0.gz")
+            elif os.path.exists(os.path.join(calc_dir, "131.0")):
+                grad_file = os.path.join(calc_dir, "131.0")
+            elif os.path.exists(os.path.join(calc_dir, "scratch/131.0.gz")):
+                grad_file = os.path.join(calc_dir, "scratch/131.0.gz")
+            elif os.path.exists(os.path.join(calc_dir, "scratch/131.0")):
+                grad_file = os.path.join(calc_dir, "scratch/131.0")
+
+            if grad_file is None:
+                task_doc["warnings"]["grad_file_missing"] = True
+            else:
+                logger.info("Parsing gradient file")
+                grad = []
+                if grad_file[-5:] == "131.0" or grad_file[-8:] == "131.0.gz":
+                    tmp_grad_data = []
+                    logger.info("Gradient file is binary")
+                    with zopen(grad_file, mode="rb") as file:
+                        binary = file.read()
+                        for ii in range(int(len(binary) / 8)):
+                            tmp_grad_data.append(
+                                struct.unpack("d", binary[ii * 8 : (ii + 1) * 8])[0]
+                            )
+                    grad = []
+                    for ii in range(int(len(tmp_grad_data) / 3)):
+                        grad.append(
+                            [
+                                float(tmp_grad_data[ii * 3]),
+                                float(tmp_grad_data[ii * 3 + 1]),
+                                float(tmp_grad_data[ii * 3 + 2]),
+                            ]
+                        )
+                else:
+                    logger.info("Gradient file is text")
+                    with zopen(grad_file, mode="rt", encoding="ISO-8859-1") as f:
+                        lines = f.readlines()
+                        for line in lines:
+                            split_line = line.split()
+                            if len(split_line) == 3:
+                                grad.append(
+                                    [
+                                        float(split_line[0]),
+                                        float(split_line[1]),
+                                        float(split_line[2]),
+                                    ]
+                                )
+                task_doc["output"]["precise_gradients"] = grad
+                logger.info("Done parsing gradient. Now removing scratch directory")
+                if os.path.exists(os.path.join(calc_dir, "scratch")):
+                    shutil.rmtree(os.path.join(calc_dir, "scratch"))
+
+        # parse the HESS file(s), if desired and if one or multiple are present
+        if self.get("parse_hess_file", False):
+            hess_files = []
+            for filename in os.listdir(calc_dir):
+                if "HESS" in filename:
+                    hess_files.append(filename)
+                elif filename == "scratch":
+                    for subfilename in os.listdir(os.path.join(calc_dir, "scratch")):
+                        if "HESS" in subfilename:
+                            hess_files.append("scratch/" + subfilename)
+                        elif subfilename == "132.0" or subfilename == "132.0.gz":
+                            hess_files.append("scratch/" + subfilename)
+
+            if len(hess_files) == 0:
+                task_doc["warnings"]["hess_file_missing"] = True
+            else:
+                logger.info("Parsing Hessian file")
+                hess_data = {}
+                for hess_name in hess_files:
+                    if hess_name[-5:] == "132.0" or hess_name[-8:] == "132.0.gz":
+                        logger.info("Hessian file is binary")
+                        tmp_hess_data = []
+                        with zopen(
+                            os.path.join(calc_dir, hess_name), mode="rb"
+                        ) as file:
+                            binary = file.read()
+                            for ii in range(int(len(binary) / 8)):
+                                tmp_hess_data.append(
+                                    struct.unpack("d", binary[ii * 8 : (ii + 1) * 8])[0]
+                                )
+                        hess_data[hess_name] = tmp_hess_data
+                    else:
+                        logger.info("Hessian file is text")
+                        with zopen(
+                            os.path.join(calc_dir, hess_name),
+                            mode="rt",
+                            encoding="ISO-8859-1",
+                        ) as f:
+                            hess_data[hess_name] = f.readlines()
+                task_doc["output"]["hess_data"] = hess_data
+                logger.info("Done parsing Hessian. Now removing scratch directory")
+                if os.path.exists(os.path.join(calc_dir, "scratch")):
+                    shutil.rmtree(os.path.join(calc_dir, "scratch"))
+
         # Check for additional keys to set based on the fw_spec
+        logger.info("Updating fw_spec_field")
         if self.get("fw_spec_field"):
             task_doc.update(
                 {self.get("fw_spec_field"): fw_spec.get(self.get("fw_spec_field"))}
             )
 
         # Update fw_spec with final/optimized structure
+        logger.info("Updating charges in spec")
         update_spec = {}
         if task_doc.get("output").get("optimized_molecule"):
             update_spec["prev_calc_molecule"] = task_doc["output"]["optimized_molecule"]
@@ -101,14 +216,17 @@ def run_task(self, fw_spec):
                 update_spec["prev_calc_esp"] = task_doc["output"]["ESP"]
 
         # get the database connection
+        logger.info("Get database connection")
         db_file = env_chk(self.get("db_file"), fw_spec)
 
         # db insertion or taskdoc dump
         if not db_file:
             with open(os.path.join(calc_dir, "task.json"), "w") as f:
                 f.write(json.dumps(task_doc, default=DATETIME_HANDLER))
         else:
+            logger.info("Connecting to QChemCalcDb")
             mmdb = QChemCalcDb.from_db_file(db_file, admin=True)
+            logger.info("Starting task_doc insert")
             t_id = mmdb.insert(task_doc)
             logger.info(f"Finished parsing with task_id: {t_id}")
 
@@ -195,12 +313,26 @@ def run_task(self, fw_spec):
         task_doc_clean["orig"]["molecule"]["spin_multiplicity"] = 1
         task_doc_clean["output"]["initial_molecule"]["charge"] = 1
         task_doc_clean["output"]["initial_molecule"]["spin_multiplicity"] = 1
-        task_doc_clean["output"]["initial_molecule"]["sites"] = [{'name': 'H', 'species': [{'element': 'H', 'occu': 1}], 'xyz': [0.0, 0.0, 0.0], 'properties': {}}]
+        task_doc_clean["output"]["initial_molecule"]["sites"] = [
+            {
+                "name": "H",
+                "species": [{"element": "H", "occu": 1}],
+                "xyz": [0.0, 0.0, 0.0],
+                "properties": {},
+            }
+        ]
         task_doc_clean["output"]["mulliken"] = [+1.000000]
         task_doc_clean["output"]["resp"] = [+1.000000]
         task_doc_clean["output"]["optimized_molecule"]["charge"] = 1
         task_doc_clean["output"]["optimized_molecule"]["spin_multiplicity"] = 1
-        task_doc_clean["output"]["optimized_molecule"]["sites"] = [{'name': 'H', 'species': [{'element': 'H', 'occu': 1}], 'xyz': [0.0, 0.0, 0.0], 'properties': {}}]
+        task_doc_clean["output"]["optimized_molecule"]["sites"] = [
+            {
+                "name": "H",
+                "species": [{"element": "H", "occu": 1}],
+                "xyz": [0.0, 0.0, 0.0],
+                "properties": {},
+            }
+        ]
         task_doc_clean["output"]["final_energy"] = (
             task_doc_2["output"]["final_energy"] - task_doc_1["output"]["final_energy"]
         )

diff --git a/atomate/qchem/firetasks/tests/test_parse_outputs.py b/atomate/qchem/firetasks/tests/test_parse_outputs.py
@@ -0,0 +1,85 @@
+import os
+import shutil
+import unittest
+
+from pymatgen.core import Molecule
+from pymatgen.io.qchem.inputs import QCInput
+
+from atomate.qchem.firetasks.parse_outputs import QChemToDb
+from atomate.utils.testing import AtomateTest
+
+from monty.serialization import loadfn
+
+__author__ = "Samuel Blau"
+__email__ = "samblau1@gmail.com"
+
+module_dir = os.path.dirname(os.path.abspath(__file__))
+
+
+class TestParseOutputQChem_grads(AtomateTest):
+    def setUp(self, lpad=False):
+        super().setUp(lpad=False)
+
+    def tearDown(self):
+        pass
+
+    def test_parse_grad_good(self):
+        my_calc_dir = os.path.join(module_dir, "..", "..", "test_files","parse_grad_good")
+        ft = QChemToDb(calc_dir=my_calc_dir, parse_grad_file=True)
+        ft.run_task({})
+        task_doc = loadfn(os.path.join(my_calc_dir,"task.json"))
+        self.assertEqual(task_doc["output"]["final_energy"], -274.6893362188)
+        self.assertEqual(len(task_doc["output"]["precise_gradients"]), 10)
+        self.assertEqual(task_doc["output"]["precise_gradients"][0], [0.0090906486788787, 0.016150932052898, 0.0054568671405536])
+        self.assertEqual(task_doc["output"]["precise_gradients"][-1], [0.0014495621906601, -0.0018570062958895, 0.0012478282193499])
+        os.remove(os.path.join(my_calc_dir, "task.json"))
+
+    def test_parse_grad_131(self):
+        my_calc_dir = os.path.join(module_dir, "..", "..", "test_files","tmqm_grad_pcm")
+        ft = QChemToDb(calc_dir=my_calc_dir, parse_grad_file=True)
+        ft.run_task({})
+        task_doc = loadfn(os.path.join(my_calc_dir,"task.json"))
+        self.assertEqual(task_doc["output"]["final_energy"], -2791.8404057999)
+        self.assertEqual(len(task_doc["output"]["precise_gradients"]), 25)
+        self.assertEqual(task_doc["output"]["precise_gradients"][0], [-2.7425178677332305e-05, 1.8017443772144412e-06, -2.3689773769176685e-06])
+        self.assertEqual(task_doc["output"]["precise_gradients"][-1], [0.0028753270363098644, -0.000392640066359285, 0.004405091870637312])
+        os.remove(os.path.join(my_calc_dir, "task.json"))
+
+    def test_parse_grad_bad(self):
+        my_calc_dir = os.path.join(module_dir, "..", "..", "test_files","parse_grad_bad")
+        ft = QChemToDb(calc_dir=my_calc_dir, parse_grad_file=True)
+        ft.run_task({})
+        task_doc = loadfn(os.path.join(my_calc_dir,"task.json"))
+        self.assertEqual(task_doc["warnings"]["grad_file_missing"], True)
+        os.remove(os.path.join(my_calc_dir, "task.json"))
+
+
+class TestParseOutputQChem_hess(AtomateTest):
+    def setUp(self, lpad=False):
+        os.makedirs(os.path.join(module_dir, "..", "..", "test_files", "freq_save_hess", "scratch"))
+        shutil.copyfile(
+            os.path.join(module_dir, "..", "..", "test_files", "freq_save_hess", "BUP_scratch", "132.0"),
+            os.path.join(module_dir, "..", "..", "test_files", "freq_save_hess", "scratch", "132.0"),
+        )
+        shutil.copyfile(
+            os.path.join(module_dir, "..", "..", "test_files", "freq_save_hess", "BUP_scratch", "HESS"),
+            os.path.join(module_dir, "..", "..", "test_files", "freq_save_hess", "scratch", "HESS"),
+        )
+        super().setUp(lpad=False)
+
+    def test_parse_hess(self):
+        my_calc_dir = os.path.join(module_dir, "..", "..", "test_files","freq_save_hess")
+        ft = QChemToDb(calc_dir=my_calc_dir, parse_hess_file=True)
+        ft.run_task({})
+        task_doc = loadfn(os.path.join(my_calc_dir,"task.json"))
+        self.assertEqual(task_doc["output"]["final_energy"], -151.3244603665)
+        self.assertEqual(task_doc["output"]["hess_data"]["scratch/132.0"][0],0.12636293260949633)
+        self.assertEqual(task_doc["output"]["hess_data"]["scratch/132.0"][-2],-0.2025032138024329)
+        self.assertEqual(task_doc["output"]["hess_data"]["scratch/HESS"][-2], ' -0.175476533300377      -0.202503213802433       0.205623571433770     \n')
+        os.remove(os.path.join(my_calc_dir, "task.json"))
+
+    def tearDown(self):
+        pass
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/atomate/qchem/firetasks/tests/test_run_calc.py b/atomate/qchem/firetasks/tests/test_run_calc.py
@@ -543,10 +543,14 @@ def test_RunQChemFake(self):
         ).data
         this_out = QCOutput("mol.qout").data
         for key in ref_out:
-            try:
-                self.assertEqual(ref_out[key], this_out[key])
-            except ValueError:
-                np.testing.assert_array_equal(ref_out[key], this_out[key])
+            if key == "dipoles":
+                self.assertEqual(ref_out[key]["total"], this_out[key]["total"])
+                np.testing.assert_array_equal(ref_out[key]["dipole"], this_out[key]["dipole"])
+            else:
+                try:
+                    self.assertEqual(ref_out[key], this_out[key])
+                except ValueError:
+                    np.testing.assert_array_equal(ref_out[key], this_out[key])
         for filename in os.listdir(
             os.path.join(module_dir, "..", "..", "test_files", "real_run")
         ):