Source code for surfaxe.vasp_data

# Pymatgen  
from pymatgen.core import Structure
from pymatgen.io.vasp.outputs import Locpot, Outcar, Vasprun
from pymatgen.analysis.local_env import CrystalNN

# Misc
import os
import pandas as pd 
import numpy as np 
import warnings 

# surfaxe 
from surfaxe.generation import oxidation_states
from surfaxe.io import _custom_formatwarning, slab_from_file, _instantiate_structure

[docs]def process_data(bulk_per_atom, parse_hkl=True, path_to_fols=None, hkl_dict=None, parse_core_energy=False, core_atom=None, bulk_nn=None, parse_vacuum=False, save_csv=True, csv_fname='data.csv', **kwargs): """ Parses the folders to collect all final data on relevant input and output parameters, and optionally core and vacuum level energies. If you are processing data for folder structures generated with `generation` make sure you use `convergence.parse_fols` function. This function is for parsing full sets of information from the output of production run calculations. The folder structure for parsing of data is fairly flexible and can be: 1. automatically parsed if `parse_hkl=True` - the function searches for folders with names three digits long in cwd (default) e.g. it finds folders cwd/100 and cwd/010 that correspond to Miller indices (1,0,0) and (0,1,0) 2. automatically parsed from a specific working directory if `path_to_fols` is specified 3. manually specified using `hkl_dict`, where the Miller index is mapped directly to the path to where the files are. If you are only interested in the specified folders, do not forget to change `parse_hkl=False`. e.g. hkl_dict = {(0,1,1): 'path/to/001/files/', (2,0,1): 'path/to/201/files/'} 4. automatically parsed from cwd or a specific working directory in addition to a defined `hkl_dict` Each of the folders must contain POSCAR and vasprun.xml files and optionally LOCPOT (or potential.csv) and OUTCAR files if vacuum or core energy are parsed. The function returns None by default and saves the DataFrame to a csv file. Optionally, it can return the DataFrame. Args: bulk_per_atom (`float`): Bulk energy per atom in eV per atom. parse_hkl (`bool`, optional): If ``True`` the script parses the names of the folders to get the Miller indices. Defaults to ``True``. path_to_fols (`str`, optional): Path to where surfaxe should look for the hkl folders are. Defaults to None which searches in cwd. hkl_dict (`dict`, optional): dictionary of tuples of Miller indices and paths to the folders the relevant outputs. Defaults to ``None``. E.g. If the outputs of the calculations on the (1,-1,2) slab are in folder ``path/to/folder/112``, the ``hkl_dict`` would be: {(1,-1,2): 'path/to/folder/112'} parse_core_energy (`bool`, optional): If True the scripts attempts to parse core energies from a supplied OUTCAR. Defaults to ``False``. core_atom (`str`, optional): The symbol of atom the core state energy level should be parsed from. Defaults to ``None``. bulk_nn (`list`, optional): The symbols of the nearest neighbours of the `core_atom`. Defaults to ``None``. parse_vacuum (`bool`, optional): if ``True`` the script attempts to parse LOCPOT using analysis.electrostatic_potential to use the maximum value of planar potential as the vacuum energy level. Defaults to ``True``. save_csv (`bool`, optional): If ``True``, it writes data to a csv file. Defaults to ``True``. csv_fname (`str`, optional): The filename of the csv. Defaults to data.csv Returns: DataFrame """ # Check if hkl_dict is correctly set up if hkl_dict: for key, value in hkl_dict.items(): if not isinstance(key, tuple): raise TypeError('The keys supplied to hkl_dict are not tuples.') if not isinstance(value, str): raise TypeError('The values supplied to hkl_dict are not strings.') cwd = os.getcwd() if path_to_fols: cwd = path_to_fols # Get the Miller indices as tuples and strings from folders in root dir if parse_hkl: if not hkl_dict: hkl_dict = {} for fol in os.listdir(cwd): if os.path.isdir(os.path.join(cwd, fol)) and len(fol)==3 and\ fol.isdigit(): hkl_dict[tuple(map(int, fol))] = os.path.join(cwd, fol) # Set up additional arguments for get_core_energy get_core_energy_kwargs = {'orbital': '1s', 'ox_states': None, 'nn_method': CrystalNN()} get_core_energy_kwargs.update( (k, kwargs[k]) for k in get_core_energy_kwargs.keys() & kwargs.keys() ) get_core=False if parse_core_energy: if core_atom is not None and bulk_nn is not None: get_core=True else: warnings.formatwarning = _custom_formatwarning warnings.warn(('Core atom or bulk nearest neighbours were not ' 'supplied. Core energy will not be parsed.')) # For each miller index, check if the folders specified are there and # parse them for data df_list, electrostatic_list, core_energy_list = ([] for i in range(3)) for hkl_tuple, path in hkl_dict.items(): vsp_path = '{}/vasprun.xml'.format(path) psc_path = '{}/POSCAR'.format(path) vsp = Vasprun(vsp_path, parse_potcar_file=False) slab = slab_from_file(psc_path,hkl_tuple) vsp_dict = vsp.as_dict() df_list.append({ 'hkl': ''.join(map(str, hkl_tuple)), 'hkl_tuple': hkl_tuple, 'area': slab.surface_area, 'atoms': vsp_dict['nsites'], 'functional': vsp_dict['run_type'], 'encut': vsp_dict['input']['incar']['ENCUT'], 'algo': vsp_dict['input']['incar']['ALGO'], 'ismear': vsp_dict['input']['parameters']['ISMEAR'], 'sigma': vsp_dict['input']['parameters']['SIGMA'], 'kpoints': vsp_dict['input']['kpoints']['kpoints'], 'bandgap': vsp_dict['output']['bandgap'], 'slab_energy': vsp_dict['output']['final_energy'], 'slab_per_atom': vsp_dict['output']['final_energy_per_atom'] }) if parse_vacuum: electrostatic_list.append( vacuum(path) ) if get_core: otc_path='{}/OUTCAR'.format(path) core_energy_list.append( core_energy(core_atom, bulk_nn, outcar=otc_path, structure=psc_path, **get_core_energy_kwargs) ) df = pd.DataFrame(df_list) df['surface_energy'] = ( (df['slab_energy'] - bulk_per_atom * df['atoms'])/(2*df['area']) * 16.02 ) df['surface_energy_ev'] = ( (df['slab_energy'] - bulk_per_atom * df['atoms'])/(2*df['area']) ) if electrostatic_list: df['vacuum_potential'] = electrostatic_list if core_energy_list: df['core_energy'] = core_energy_list # Save to csv or return DataFrame if save_csv: if not csv_fname.endswith('.csv'): csv_fname += '.csv' df.to_csv(csv_fname, header=True, index=False) else: return df
[docs]def vacuum(path=None): ''' Gets the energy of the vacuum level. It either parses potential.csv file if available or tries to calculate planar potential from LOCPOT. If neither file is available, function returns np.nan. Args: path (`str`, optional): the path to potential.csv or LOCPOT files. Can be the path to a directory in which either file is or you can specify a path that must end in .csv or contain LOCPOT. Defaults to looking for potential.csv or LOCPOT in cwd. Returns: Maximum value of planar potential ''' if type(path)==str and path.endswith('.csv'): df = pd.read_csv(path) max_potential = df['planar'].max() max_potential = round(max_potential, 3) elif type(path)==str and 'LOCPOT' in path: lpt = Locpot.from_file(path) planar = lpt.get_average_along_axis(2) max_potential = float(f"{np.max(planar): .3f}") else: if path is None: cwd = os.getcwd() else: cwd = path if os.path.isfile('{}/potential.csv'.format(cwd)): df = pd.read_csv('{}/potential.csv'.format(cwd)) max_potential = df['planar'].max() max_potential = round(max_potential, 3) elif os.path.isfile('{}/LOCPOT'.format(cwd)): lpt = Locpot.from_file('{}/LOCPOT'.format(cwd)) planar = lpt.get_average_along_axis(2) max_potential = float(f"{np.max(planar): .3f}") else: max_potential = np.nan warnings.formatwarning = _custom_formatwarning warnings.warn('Vacuum electrostatic potential was not parsed from {} ' 'no LOCPOT or potential.csv files were provided.'.format(path)) return max_potential
[docs]def core_energy(core_atom, bulk_nn, orbital='1s', ox_states=None, nn_method=CrystalNN(), outcar='OUTCAR', structure='POSCAR'): """ Parses the structure and OUTCAR files for the core level energy. Check the validity of nearest neighbour method on the bulk structure before using it on slabs. Args: core_atom (`str`, optional): The symbol of atom the core state energy level should be parsed from. bulk_nn (`list`, optional): The symbols of the nearest neighbours of the `core_atom`. orbital (`str`, optional): The orbital of core state. Defaults to 1s. ox_states (``None``, `list` or `dict`, optional): Add oxidation states to the structure. Different types of oxidation states specified will result in different pymatgen functions used. The options are: * if supplied as ``list``: The oxidation states are added by site e.g. ``[3, 2, 2, 1, -2, -2, -2, -2]`` * if supplied as ``dict``: The oxidation states are added by element e.g. ``{'Fe': 3, 'O':-2}`` * if ``None``: The oxidation states are added by guess. Defaults to ``None``. nn_method (`class` instance, optional): The coordination number algorithm used. Because the ``nn_method`` is a class, the class needs to be imported from pymatgen.analysis.local_env before it can be instantiated here. Defaults to ``CrystalNN()``. outcar (`str`, optional): Path to the OUTCAR file. Defaults to ``./OUTCAR``. structure (`str`, optional): Path to the structure file in any format supported by pymatgen. Defaults to ``./POSCAR``. Can also accept a pymaten.core.Structure object directly. Returns: Core state energy """ struc = _instantiate_structure(structure) struc = oxidation_states(struc, ox_states) bonded_struc = nn_method.get_bonded_structure(struc) bulk_nn.sort() bulk_nn_str = ' '.join(bulk_nn) # Get the nearest neighbours info, the c-coordinate and index number # for each atom list_of_dicts = [] for n, pos in enumerate(struc): if pos.specie.symbol == core_atom: nn_info = bonded_struc.get_connected_sites(n) slab_nn_list = [] for d in nn_info: nn = d.site.specie.symbol slab_nn_list.append(nn) slab_nn_list.sort() slab_nn = ' '.join(slab_nn_list) list_of_dicts.append({ 'atom': n, 'nn': slab_nn, 'c_coord': pos.c }) # Make pandas Dataframe, query the interquartile range of fractional # coordinates of atoms whose nearest neighbour environment is same as the # bulk nearest neighbours provided, get the atom that is the nearest to the # median of the interquartile range df = pd.DataFrame(list_of_dicts) low, high = df['c_coord'].quantile([0.25,0.75]) df = df.query('@low<c_coord<@high and nn==@bulk_nn_str') atom = df['atom'].quantile(interpolation='nearest') # Check if the df from query isn't empty - if it is it returns # a nan as core energy, otherwise it attempts to extract from OUTCAR if type(atom) is np.float64: core_energy = np.nan else: # Read OUTCAR, get the core state energy otc = Outcar(outcar) core_energy_dict = otc.read_core_state_eigen() try: core_energy = core_energy_dict[atom][orbital][-1] except IndexError: core_energy = np.nan return core_energy