Source code for vasppy.summary

# Summary class and helper methods
# Used for summarising VASP calculations as YAML

from pymatgen.io.vasp.outputs import Vasprun  # type: ignore
from pymatgen.analysis.transition_state import NEBAnalysis  # type: ignore
from vasppy.vaspmeta import VASPMeta
from vasppy.outcar import (
    final_energy_from_outcar,
    vasp_version_from_outcar,
    potcar_eatom_list_from_outcar,
)
from vasppy.data.potcar_data import potcar_md5sum_data
from vasppy.utils import file_md5, md5sum, match_filename, cd
from xml.etree import ElementTree as ET
import yaml
import glob
import re

potcar_sets = [
    "PBE",
    "PBE_52",
    "PBE_54",
    "PBE_54r",
    "LDA_54r",
    "LDA",
    "LDA_52",
    "LDA_54",
    "GGA",
    "USPP_GGA",
    "USPP_LDA",
]


[docs]def load_vasp_summary(filename): """ Reads a `vasp_summary.yaml` format YAML file and returns a dictionary of dictionaries. Each YAML document in the file corresponds to one sub-dictionary, with the corresponding top-level key given by the `title` value. Example: The file:: --- title: foo data: foo_data --- title: bar data: bar_data is converted to the dictionary:: { 'foo': { 'title': 'foo', 'data': 'foo_data' }, 'bar': { 'title': 'bar', 'data': 'bar_data' } } Args: filename (str): File path for the `vasp_summary.yaml` file. Returns: (dict(dict,dict,...)): A dictionary of separate YAML documents, each as dictionaries.a """ with open(filename, "r") as stream: docs = yaml.load_all(stream, Loader=yaml.SafeLoader) data = {d["title"]: d for d in docs} return data
[docs]def potcar_spec(filename, return_hashes=False): """ Returns a dictionary specifying the pseudopotentials contained in a POTCAR file. Args: filename (str): The name of the POTCAR file to process. return_hash (bool): If True the return dictionary values will be the md5 hashes of the component pseudopotential files. Returns: (Dict): A dictionary of pseudopotential filename: dataset pairs, e.g. {'Fe_pv': 'PBE_54', 'O', 'PBE_54'} """ p_spec = {} with open(filename, "r") as f: potcars = [s for s in re.split("(End of Dataset\n)", f.read()) if s] potcar_md5sums = [ md5sum("".join(pair)) for pair in zip(potcars[::2], potcars[1::2]) ] for this_md5sum in potcar_md5sums: for ps in potcar_sets: for p, p_md5sum in potcar_md5sum_data[ps].items(): if this_md5sum == p_md5sum: if return_hashes: p_spec[p] = this_md5sum else: p_spec[p] = ps if len(p_spec) != len(potcar_md5sums): raise ValueError("One or more POTCARs did not have matching md5 hashes") return p_spec
[docs]def find_vasp_calculations(): """ Returns a list of all subdirectories that contain either a vasprun.xml file or a compressed vasprun.xml.gz file. Args: None Returns: (List): list of all VASP calculation subdirectories. """ dir_list = [ "./" + re.sub(r"vasprun\.xml", "", path) for path in glob.iglob("**/vasprun.xml", recursive=True) ] gz_dir_list = [ "./" + re.sub(r"vasprun\.xml\.gz", "", path) for path in glob.iglob("**/vasprun.xml.gz", recursive=True) ] return dir_list + gz_dir_list
[docs]class Summary: """ TODO Document Summary class """ supported_flags = { "title": "Title", "description": "Description", "notes": "Notes", "type": "Type", "status": "Status", "stoichiometry": "Stoichiometry", "potcar": "POTCAR", "eatom": "POTCAR EATOM values", "plus_u": "Dudarev +U parameters", "energy": "Energy", "lreal": "LREAL", "k-points": "k-points", "functional": "functional", "encut": "encut", "ediffg": "ediffg", "ibrion": "ibrion", "converged": "converged", "md5": "md5", "directory": "directory", "vbm": "Vasprun valence band maximum", "cbm": "Vasprun conduction band minimum", "track": "tracking for files", "version": "VASP executable version", "nelect": "NELECT", } def __init__(self, directory="."): self.directory = directory with cd(directory): try: self.meta = VASPMeta.from_file("vaspmeta.yaml") except FileNotFoundError as exc: raise FileNotFoundError( f"vaspmeta.yaml not found in {directory}" ) from exc self.parse_vasprun() self.print_methods = { "title": self.print_title, "description": self.print_description, "notes": self.print_notes, "type": self.print_type, "status": self.print_status, "stoichiometry": self.print_stoichiometry, "potcar": self.print_potcar, "eatom": self.print_eatom, "energy": self.print_energy, "k-points": self.print_kpoints, "functional": self.print_functional, "encut": self.print_encut, "plus_u": self.print_plus_u, "ediffg": self.print_ediffg, "ibrion": self.print_ibrion, "converged": self.print_converged, "version": self.print_version, "md5": self.print_vasprun_md5, "directory": self.print_directory, "lreal": self.print_lreal, "vbm": self.print_vbm, "cbm": self.print_cbm, "track": self.print_file_tracking, "nelect": self.print_nelect, } if not set(self.print_methods.keys()) == set(self.supported_flags): print(set(self.print_methods.keys())) print("--------------") print(set(self.supported_flags.keys())) raise (ValueError)
[docs] def parse_vasprun(self): """ Read in `vasprun.xml` as a pymatgen Vasprun object. Args: None Returns: None None: If the vasprun.xml is not well formed this method will catch the ParseError and set self.vasprun = None. """ self.vasprun_filename = match_filename("vasprun.xml") if not self.vasprun_filename: raise FileNotFoundError("Could not find vasprun.xml or vasprun.xml.gz file") try: self.vasprun = Vasprun( self.vasprun_filename, parse_potcar_file=False, parse_dos=False ) except ET.ParseError: self.vasprun = None except: raise
@property def stoich(self): return self.vasprun.final_structure.composition.get_el_amt_dict() @property def functional(self): """ String description of the calculation functional. Returns: (Str): String describing the calculation functional. """ return self.vasprun.run_type
[docs] def potcars_are_pbe(self): return all("PBE" in s for s in self.vasprun.potcar_symbols)
[docs] def output(self, to_print): if not self.vasprun: to_print = ["title", "type", "status"] print("---") for p in to_print: self.print_methods[p]() print("", flush=True)
[docs] def print_type(self): if self.meta.type: print("type: {}".format(self.meta.type))
[docs] def print_title(self): print("title: {}".format(self.meta.title))
[docs] def print_description(self): print("description: {}".format(self.meta.description.strip()))
[docs] def print_notes(self): if self.meta.notes: print("notes: {}".format(self.meta.notes.strip())) else: print("notes: ~")
[docs] def print_status(self): print("status: {}".format(self.meta.status))
[docs] def print_lreal(self): print("lreal: {}".format(self.vasprun.parameters["LREAL"]))
[docs] def print_stoichiometry(self): print("stoichiometry:") for element in self.stoich: print(" - {}: {}".format(element, int(self.stoich[element])))
[docs] def print_potcar(self): print("potcar:") for e, p in zip(self.stoich, self.vasprun.potcar_symbols): print(" - {}: {}".format(e, p))
[docs] def print_energy(self): # if this gets more options, it might be a good idea to set the # appropriate method using a dictionary? # or we could subclass Summary --> NEB_Summary ? if not self.meta.type: print("energy: {}".format(self.vasprun.final_energy)) elif self.meta.type == "neb": self.print_neb_energy() else: raise ValueError("VASPMeta type not supported: {}".format(self.meta.type))
[docs] def print_neb_energy(self): image_00_energy = final_energy_from_outcar("00/OUTCAR") print("reference energy: {} eV".format(image_00_energy)) neb = NEBAnalysis.from_dir(".") print("neb image energies:") for i, e in enumerate(neb.energies): print(" - {:02d}: {:10.6f} eV".format(i, e))
[docs] def print_version(self): version_string = vasp_version_from_outcar( "{}/OUTCAR".format(self.directory) ).split()[0] print("version: {}".format(version_string))
[docs] def print_eatom(self): # This is one way to try to uniquely identify the POTCARs used, because the # potcar_symbol (e.g. `Ti_pv 07Sep2000`) is not sufficient. print("eatom:") for e, eatom in zip( self.stoich, potcar_eatom_list_from_outcar("{}/OUTCAR".format(self.directory)) ): print(" - {}: {} eV".format(e, eatom))
[docs] def print_kpoints(self): print("k-points:") print(" scheme: {}".format(self.vasprun.kpoints.style)) print( " grid: {}".format( " ".join(str(k) for k in self.vasprun.kpoints.kpts[0]) ) )
[docs] def print_functional(self): print("functional: {}".format(self.functional))
[docs] def print_ibrion(self): print("ibrion: {}".format(self.vasprun.incar["IBRION"]))
[docs] def print_ediffg(self): print("ediffg: {}".format(self.vasprun.incar["EDIFFG"]))
[docs] def print_encut(self): if "ENCUT" in self.vasprun.incar: print("encut: {}".format(self.vasprun.incar["ENCUT"])) elif "ENMAX" in self.vasprun.incar: print("encut: {}".format(self.vasprun.incar["ENMAX"]))
[docs] def print_converged(self): print("converged: {}".format(self.vasprun.converged))
[docs] def print_vasprun_md5(self): print( "vasprun md5: {}".format( file_md5("{}/{}".format(self.directory, self.vasprun_filename)) ) )
[docs] def print_file_tracking(self): if self.meta.track: print("file tracking:") for f, new_filename in self.meta.track.items(): print(" {}:".format(f)) if not new_filename: new_filename = f print(" filename: {}".format(new_filename)) filename = match_filename(self.directory + f) if filename: md5 = file_md5(filename) else: md5 = "null" print(" md5: {}".format(md5))
[docs] def print_directory(self): print("directory: {}".format(self.directory))
[docs] def print_plus_u(self): if "LDAUU" in self.vasprun.incar: lqn = {0: "s", 1: "p", 2: "d", 3: "f"} ldauu = self.vasprun.incar["LDAUU"] ldauj = self.vasprun.incar["LDAUJ"] ldaul = self.vasprun.incar["LDAUL"] if any(v != 0 for v in ldauu): print("ldau:") for e, u, j, l in zip(self.stoich, ldauu, ldauj, ldaul): if u != 0: print(" - {}: {} {} {}".format(e, lqn[l], u, j))
[docs] def print_cbm(self): print("cbm: {}".format(self.vasprun.eigenvalue_band_properties[1]))
[docs] def print_vbm(self): print("vbm: {}".format(self.vasprun.eigenvalue_band_properties[2]))
[docs] def print_nelect(self): print("nelect: {}".format(self.vasprun.parameters["NELECT"]))