mlatom.interfaces.gaussian_interface 源代码

#!/usr/bin/env python3
'''
.. code-block::

  !---------------------------------------------------------------------------! 
  ! gaussian: interface to the Gaussian program                               ! 
  ! Implementations by: Pavlo O. Dral, Peikun Zheng, Yi-Fan Hou               !
  !---------------------------------------------------------------------------! 
'''

import os, sys, subprocess
import numpy as np
from mlatom import constants
from mlatom import data
from mlatom import models
from mlatom.decorators import doc_inherit

[文档] class gaussian_methods(models.model, metaclass=models.meta_method): ''' Gaussian interface Arguments: method (str): Method to use chkfilename (bool, str): request to create chk file defined by %chk in Gaussian (default: None). If True, assignes default filename. memory (str): equivalent to %mem in Gaussian input file, e.g., memory=16Gb (default: None). nthreads (int): equivalent to %proc in Gaussian input file. gaussian_keywords (str): any gaussian keywords in addition to method. additional_input (str): any additional input to be appended to the Gaussian input file. save_files_in_current_directory (bool): whether to keep input and output files, default ``'False'``. working_directory (str): path to the directory where the program output files and other tempory files are saved, default ``'None'``. .. note:: The format of method should be the same as that in Gaussian, e.g., ``'B3LYP/6-31G*'`` ''' def __init__(self, method='B3LYP/6-31G*', gaussian_keywords=None, additional_input='', chkfilename=None, memory=None, nthreads=None,save_files_in_current_directory=False, working_directory=None, ): if not "GAUSS_EXEDIR" in os.environ: raise ValueError('enviromental variable GAUSS_EXEDIR is not set') self.method = method self.gaussian_keywords = gaussian_keywords self.additional_input = additional_input self.chkfilename = chkfilename if nthreads is None: from multiprocessing import cpu_count self.nthreads = cpu_count() else: self.nthreads = nthreads self.memory = memory self.save_files_in_current_directory = save_files_in_current_directory self.working_directory = working_directory
[文档] @doc_inherit def predict(self,molecular_database=None,molecule=None, nstates=1, current_state=0, calculate_energy=True, calculate_energy_gradients=False, calculate_hessian=False, calculate_dipole_derivatives=False, gaussian_keywords='',): ''' nstates (int): number of electronic structure states (default: 1, ground state) current_state (int): default is the ground state (for nstates=1) or the first excited state (nstates > 1) gaussian_keywords (str): any gaussian keywords. ''' molDB = super().predict(molecular_database=molecular_database, molecule=molecule) method = self.method #self.energy_to_read = energy_to_read self.calculate_energy_gradients = calculate_energy_gradients self.calculate_energy = calculate_energy self.calculate_hessian = calculate_hessian self.calculate_dipole_derivatives = calculate_dipole_derivatives self.gaussian_keywords = gaussian_keywords if nstates > 1: if 'TD-'.casefold() in method.casefold(): import re method = re.sub('td-', '', method, flags=re.IGNORECASE) if current_state == 0: current_state = 1 # default in Gaussian method += f' TD(NStates={nstates-1},Root={current_state})' if calculate_energy_gradients: if not calculate_hessian: if 'force'.casefold() in method.casefold(): pass else: method += ' force(nostep)' if calculate_hessian: self.chkfilename = True if 'freq'.casefold() in method.casefold(): pass else: method += ' freq' if calculate_dipole_derivatives: method += ' IOp(7/33=1)' import tempfile, subprocess with tempfile.TemporaryDirectory() as tmpdirname: if self.save_files_in_current_directory: tmpdirname = '.' if self.working_directory is not None: tmpdirname = self.working_directory if not os.path.exists(tmpdirname): os.makedirs(tmpdirname) tmpdirname = os.path.abspath(tmpdirname) for imol in range(len(molDB.molecules)): imolecule = molDB.molecules[imol] filename_wo_extension = 'molecule'+str(imol) if self.chkfilename == True: chkfilename=f'{filename_wo_extension}.chk' elif type(self.chkfilename) == str: chkfilename = self.chkfilename else: chkfilename=None # Run Gaussian job run_gaussian_job(filename=f'{filename_wo_extension}.com',molecule=imolecule, method=method, gaussian_keywords=self.gaussian_keywords,additional_input=self.additional_input, chkfilename=chkfilename, memory=self.memory, nthreads=self.nthreads, working_directory=tmpdirname, ) # Read Gaussian output file parse_gaussian_output(filename=os.path.join(tmpdirname,f'{filename_wo_extension}.log'),molecule=imolecule)
def run_gaussian_job(filename=None, molecule=None, reactants=None, products=None, gaussian_keywords='', nthreads=1, memory=None, working_directory='.', # The block below is for using Gaussian for SP calculations method=None, chkfilename=None, # The block below is for using Gaussian as engine for jobs with external potential external_task=None, model_predict_kwargs={}, extra_keywords='', # string with extra keywords additional_input='', opt_keywords = ['nomicro'], frequency_keywords=[], # list with additional arguments such as ['NoRaman',] etc. irc_keywords=['CalcFC'] ): if external_task is not None: pythonbin = sys.executable path_to_this_file=os.path.abspath(__file__) model_predict_kwargs_str = str(model_predict_kwargs) model_predict_kwargs_str_file = 'model_predict_kwargs' with open(os.path.join(working_directory, model_predict_kwargs_str_file), 'w') as f: f.write(model_predict_kwargs_str) external_command = f"external='{pythonbin} {path_to_this_file} {model_predict_kwargs_str_file}'" if external_task.casefold() == 'opt': if len(opt_keywords) > 0: opt_str = ','.join(opt_keywords) if not 'nomicro' in opt_str.casefold(): print("Warning: please specify opt_keywords=['nomicro'] unless you know what you are doing.") gaussian_keywords = f"opt({opt_str})" else: gaussian_keywords = "opt" elif 'freq' in external_task.casefold(): if len(frequency_keywords) != 0: freq_str = ','.join(frequency_keywords) if external_task.casefold() == 'freq(anharmonic)': freq_str = f',{freq_str}' gaussian_keywords = f"freq(anharmonic{freq_str})" elif external_task.casefold() == 'freq': gaussian_keywords = f"freq({freq_str})" else: if external_task.casefold() == 'freq(anharmonic)': gaussian_keywords = f"Freq(anharmonic)" elif external_task.casefold() == 'freq': gaussian_keywords = f"Freq" else: gaussian_keywords = f"{external_task}" elif external_task.casefold() in ['ts', 'qst2', 'qst3']: if len(opt_keywords) > 0: if not 'nomicro' in [kwd.casefold() for kwd in opt_keywords]: print("Warning: please specify opt_keywords=['nomicro'] unless you know what you are doing.") if len(opt_keywords) == 1 and opt_keywords[0].casefold() == 'nomicro': if external_task.casefold() == 'ts': opt_keywords = ['CalcFC', 'noeigen', 'nomicro'] else: opt_keywords = ['CalcFC', 'nomicro'] if not external_task.casefold() in [kwd.casefold() for kwd in opt_keywords]: opt_keywords = [external_task] + opt_keywords else: opt_keywords = ['TS', 'CalcFC', 'noeigen', 'nomicro'] opt_str = ','.join(opt_keywords) gaussian_keywords = f"Opt({opt_str})" elif external_task.casefold() == 'irc': if len(irc_keywords) > 0: irc_str = ','.join(irc_keywords) gaussian_keywords = f"irc({irc_str})" else: gaussian_keywords = "irc" if len(extra_keywords) > 0: gaussian_keywords += f' {extra_keywords}' gaussian_keywords += f'\n{external_command}' else: if method is not None: gaussian_keywords += f'{method}' if not gaussian_keywords.strip()[:1] == '#': if external_task is not None or 'freq' in gaussian_keywords: gaussian_keywords = f'#p {gaussian_keywords}' else: gaussian_keywords = f'# {gaussian_keywords}' if memory is not None: gaussian_keywords = f"%mem={memory}\n" + gaussian_keywords gaussian_keywords = f'%nproc={nthreads}\n' + gaussian_keywords if chkfilename is not None: gaussian_keywords = f'%chk={chkfilename}\n' + gaussian_keywords write_gaussian_input_file(filename=os.path.join(working_directory,filename), molecule=molecule, reactants=reactants, products=products, gaussian_keywords=gaussian_keywords,additional_input=additional_input) Gaussianbin, _ = check_gaussian() proc = subprocess.Popen([Gaussianbin, filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=working_directory, universal_newlines=True) proc.communicate() def check_gaussian(): status = os.popen('echo $GAUSS_EXEDIR').read().strip() if len(status) != 0: Gaussianroot = status.split('bsd')[0] if 'g16' in Gaussianroot: version = 'g16' elif 'g09' in Gaussianroot: version = 'g09' Gaussianbin = os.path.join(Gaussianroot,version) else: raise ValueError('Cannot find Gaussian software in the environment, set $GAUSS_EXEDIR environmental variable') version = version.replace('g', '') return Gaussianbin, version def write_gaussian_input_file(filename=None, molecule=None, reactants=None, products=None, gaussian_keywords='', additional_input=''): input_string = f'{gaussian_keywords}\n' def mol_str(molecule, title='molecule'): value = '' if 'comment' in molecule.__dict__: if molecule.comment != '': title = molecule.comment elif molecule.id != '': title = molecule.id value += f'\n{title}\n' value += f'\n{molecule.charge} {molecule.multiplicity}\n' value += molecule.get_xyz_string(only_coordinates=True) return value if reactants is not None: input_string += mol_str(reactants, title='reactants') if products is not None: input_string += mol_str(products, title='products') if molecule is not None: input_string += mol_str(molecule) if len(additional_input) > 0: input_string += f'\n{additional_input}' input_string += '\n\n' if filename is None: return input_string with open(filename, 'w') as f: f.writelines(input_string) def gaussian_external(EIn_file, EOu_file, model_predict_kwargs): # write new coordinate into 'xyz_temp.dat' derivs, molecule = read_gaussian_EIn(EIn_file) # calculate energy, gradients, hessian for new coordinates # import json # with open('model.json', 'r') as fjson: # model_dict = json.load(fjson) # if 'method' in model_dict: # kwargs = {} # if 'kwargs' in model_dict: # kwargs = model_dict['kwargs'] # del model_dict['kwargs'] # model = models.methods(**model_dict, **kwargs) model = models.load('model.json') calc_hessian = False if derivs == 2: calc_hessian = True if 'filename' in model_predict_kwargs: if model_predict_kwargs['print_properties'] is not None: printstrs = model._predict_geomopt(molecule=molecule, calculate_energy=True, calculate_energy_gradients=True, calculate_hessian=calc_hessian, **model_predict_kwargs) filename=model_predict_kwargs['filename'] with open(f'{filename}_tmp_out.out', 'a') as ff: ff.writelines(printstrs) else: model._predict_geomopt(molecule=molecule, calculate_energy=True, calculate_energy_gradients=True, calculate_hessian=calc_hessian, **model_predict_kwargs) else: model.predict(molecule=molecule, calculate_energy=True, calculate_energy_gradients=True, calculate_hessian=calc_hessian, **model_predict_kwargs) if not 'energy' in molecule.__dict__: raise ValueError('model did not return any energy') write_gaussian_EOu(EOu_file, derivs, molecule) if os.path.exists('gaussian_freq_mol.json'): molecule.dump(filename='gaussian_freq_mol.json', format='json') def read_gaussian_EIn(EIn_file): molecule = data.molecule() with open(EIn_file, 'r') as fEIn: lines = fEIn.readlines() line0 = lines[0].strip().split() natoms = int(line0[0]); derivs = int(line0[1]) molecule.charge = int(line0[2]); molecule.multiplicity = int(line0[3]) for i in range(1, natoms+1): xx = lines[i].strip().split() atom = data.atom(atomic_number=int(xx[0]), xyz_coordinates=np.array(xx[1:-1]).astype('float')*constants.Bohr2Angstrom) molecule.atoms.append(atom) return derivs, molecule def write_gaussian_EOu(EOu_file, derivs, molecule): import fortranformat with open(EOu_file, 'w') as fEOu: # energy, dipole-moment (xyz) E, Dip(I), I=1,3 if 'dipole_moment' in molecule.__dict__.keys(): dp = molecule.dipole_moment else: dp = [0.0,0.0,0.0] writer = fortranformat.FortranRecordWriter('(4D20.12)') output = writer.write([molecule.energy, dp[0], dp[1], dp[2]]) fEOu.write(output) fEOu.write('\n') writer = fortranformat.FortranRecordWriter('(3D20.12)') # gradient on atom (xyz) FX(J,I), J=1,3; I=1,NAtoms output = writer.write(molecule.get_energy_gradients().flatten()*constants.Bohr2Angstrom) fEOu.write(output) fEOu.write('\n') if derivs == 2: natoms = len(molecule.atoms) # polarizability Polar(I), I=1,6 polor = np.zeros(6) output = writer.write(polor) fEOu.write(output) fEOu.write('\n') # dipole derivatives DDip(I), I=1,9*NAtoms ddip = np.zeros(9*natoms) if 'dipole_derivatives' in molecule.__dict__.keys(): ddip = molecule.dipole_derivatives * constants.Bohr2Angstrom * constants.Debye2au output = writer.write(ddip) fEOu.write(output) fEOu.write('\n') # force constants FFX(I), I=1,(3*NAtoms*(3*NAtoms+1))/2 output = writer.write(molecule.hessian[np.tril_indices(natoms*3)]*constants.Bohr2Angstrom**2) fEOu.write(output) def read_Hessian_matrix_from_Gaussian_chkfile(filename,molecule): natoms = len(molecule.atoms) hessian = np.zeros((3*natoms,3*natoms)) cmdargs = ['chkchk','-p',filename] cmd = f'chkchk -p {filename}' proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, shell=True) outs,errs = proc.communicate() stdout = outs.split('\n') stderr = errs.split('\n') outputs = [] for readable in stdout: outputs.append(readable) flag = None for iline in range(len(outputs)): if 'Input for Opt=FCCards' in outputs[iline]: flag = iline+1 # If there is only 1 atom, chk does not have hessian matrix if flag is None: molecule.hessian = hessian else: flag += 1 + int((3*natoms-0.5)//6 + 1)# Skip energy and forces hessian_len = int(((3*natoms)*(3*natoms+1)//2-0.5)//6 + 1) hessian_array = [] for ii in range(hessian_len): hessian_array += [eval(each) for each in outputs[flag+ii].strip().split()] #print(hessian_array) for ii in range(3*natoms): hessian[:ii+1,ii] = hessian_array[(ii+1)*ii//2:(ii+2)*(ii+1)//2] for ii in range(3*natoms): for jj in range(ii+1,3*natoms): hessian[jj,ii] = hessian[ii,jj] #print(hessian[:,-2]) hessian /= constants.Bohr2Angstrom**2 molecule.hessian = hessian
[文档] def parse_gaussian_output(filename=None, molecule=None): """ Parsing Gaussian output file and return a molecule obj :param filename: Name of Gaussian output file (e.g. benzene.log or benzene.out) :return: molecule if molecule is not provided """ if molecule is not None: mol = molecule else: mol = data.molecule() #successful = False readArchive = False chkfilename = None archive = [] forces_iatom = -4 i_read_hessian = -1 mulliken_charge_iatom = -3 read_dipole_moment = False readNMs = False ireadS = -1 es_mults = [] es_contribs = [] readES = False i_read_input_orientation = -1 i_read_standard_orientation = -1 xyz_input_orientation = [] xyz_standard_orientation = [] scf_energies = [] energy_gradients = [] # flags to check whether a property was never read before - by default overwrite everything read_already_freq = False read_already_redmass = False read_already_force_const = False read_already_ir_intens = False read_already_ES = False read_already_FB = False read_already_overtones = False read_already_comb_bands = False read_already_FBIR = False read_already_OvertonesIR = False read_already_CombinationBandsIR = False # start anharmonic properties ireadFB = -1 ireadOvertones = -1 ireadCombinationBands = -1 i_anh_thermo = -1 ireadFBIR = -1 ireadOvertonesIR = -1 ireadCombinationBandsIR = -1 FBIRError = False OvertonesIRError = False CombinationBandsIRError = False # end anharmonic properties with open(filename, 'r') as ff: for line in ff: #print(line) if ' 1\\1\\' in line or ' 1|1|' in line: readArchive = True archive = [line.rstrip()] continue if readArchive: if '\\\\@' in line or '||@' in line: readArchive = False archive.append(line.rstrip()) continue if '%chk=' in line: chkfilename = line.split('=')[-1].strip() if not '.chk' in chkfilename: chkfilename += '.chk' if 'Input orientation:' in line: i_read_input_orientation = 0 xyz_input_orientation.append([]) continue if i_read_input_orientation > -1: i_read_input_orientation += 1 if i_read_input_orientation >= 5: if '-------------------------' in line: i_read_input_orientation = -1 continue xyz_input_orientation[-1].append([float(xx) for xx in line.split()[-3:]]) continue if 'Standard orientation:' in line: i_read_standard_orientation = 0 xyz_standard_orientation.append([]) continue if i_read_standard_orientation > -1: i_read_standard_orientation += 1 if i_read_standard_orientation >= 5: if '-------------------------' in line: i_read_standard_orientation = -1 continue xyz_standard_orientation[-1].append([float(xx) for xx in line.split()[-3:]]) continue if 'SCF Done:' in line: scf_energies.append(float(line.split()[4])) if 'Forces (Hartrees/Bohr)' in line: forces_iatom = -3 energy_gradients.append([]) continue if forces_iatom > -4: forces_iatom += 1 if forces_iatom < 0: continue if '------------------' in line: forces_iatom = -4 continue energy_gradients[-1].append(-data.array([float(each) for each in line.split()[2:]]) / constants.Bohr2Angstrom) continue if 'The second derivative matrix:' in line: i_read_hessian = 0 natoms = len(mol.atoms) mol.hessian = np.zeros((3*natoms,3*natoms)) continue if i_read_hessian > -1: i_read_hessian += 1 nblocks = int((3*natoms-0.5)//5 + 1) icount = 0 for iblock in range(nblocks): for ii in range(3*natoms-5*iblock): if i_read_hessian == 1+icount+ii+1: temp = [float(each) for each in line.strip().split()[1:]] for jj in range(min(5,3*natoms-5*iblock)): if ii >= jj: mol.hessian[ii+5*iblock,jj+5*iblock] = temp[jj] icount += 3*natoms-5*iblock+1 if i_read_hessian >= icount: i_read_hessian = -1 for ii in range(3*natoms): for jj in range(ii+1,3*natoms): mol.hessian[ii,jj] = mol.hessian[jj,ii] mol.hessian = mol.hessian / constants.Bohr2Angstrom**2 continue if 'Mulliken charges:' in line: mulliken_charge_iatom = -2 continue if mulliken_charge_iatom > -3: if len(line.split()) > 3: mulliken_charge_iatom = -3 continue mulliken_charge_iatom += 1 if mulliken_charge_iatom < 0: continue if len(mol.atoms) < mulliken_charge_iatom+1: mol.atoms.append(data.atom(element_symbol=line.split()[1])) mol.atoms[mulliken_charge_iatom].mulliken_charge = float(line.split()[-1]) continue if 'Dipole moment' in line: read_dipole_moment = True continue if read_dipole_moment: xx = line.split() try: mol.dipole_moment = data.array([float(yy) for yy in [xx[1], xx[3], xx[5], xx[7]]]) except: pass read_dipole_moment = False continue if 'Frequencies --' in line: if not read_already_freq: read_already_freq = True mol.frequencies = [] mol.frequencies += [float(xx) for xx in line.split()[2:]] continue if 'Red. masses --' in line: if not read_already_redmass: read_already_redmass = True mol.reduced_masses = [] mol.reduced_masses += [float(xx) for xx in line.split()[3:]] continue if 'Frc consts --' in line: if not read_already_force_const: read_already_force_const = True mol.force_constants = [] mol.force_constants += [float(xx) for xx in line.split()[3:]] continue if 'IR Inten --' in line: if not read_already_ir_intens: read_already_ir_intens = True mol.infrared_intensities = [] mol.infrared_intensities += [float(xx) for xx in line.split()[3:]] continue if 'Atom AN X Y Z' in line: readNMs = True nm_iatom = -1 continue elif readNMs: if len(line.split()) <= 3: readNMs = False continue nm_iatom += 1 if len(mol.atoms) < nm_iatom+1: mol.atoms.append(data.atom(atomic_number=int(line.split()[1]))) if not 'normal_modes' in mol.atoms[nm_iatom].__dict__: mol.atoms[nm_iatom].normal_modes = [] xyzs = np.array([float(xx) for xx in line.split()[2:]]) nxyzs = len(xyzs) / 3 for xyz in np.array_split(xyzs, nxyzs): mol.atoms[nm_iatom].normal_modes.append(list(xyz)) if 'Zero-point correction=' in line: mol.ZPE = float(line.split()[2]) continue if 'Thermal correction to Energy=' in line: mol.DeltaE2U = float(line.split()[-1]) continue if 'Thermal correction to Enthalpy=' in line: mol.DeltaE2H = float(line.split()[-1]) continue if 'Thermal correction to Gibbs Free Energy=' in line: mol.DeltaE2G = float(line.split()[-1]) continue if 'Sum of electronic and zero-point Energies=' in line: mol.U0 = float(line.split()[-1]) mol.H0 = mol.U0 continue if 'Sum of electronic and thermal Energies=' in line: mol.U = float(line.split()[-1]) continue if 'Sum of electronic and thermal Enthalpies=' in line: mol.H = float(line.split()[-1]) continue if 'Sum of electronic and thermal Free Energies=' in line: mol.G = float(line.split()[-1]) continue if 'E (Thermal) CV S' in line: ireadS = 0 continue if ireadS > -1: ireadS +=1 if ireadS == 2: mol.S = float(line.split()[-1]) * constants.kcalpermol2Hartree / 1000.0 ireadS = -1 continue if 'Excited State' in line: xx = line.split() excitation_energy = float(xx[4]) * constants.eV2hartree ff = float(xx[8].split('=')[1]) mult = xx[3] if not read_already_ES: read_already_ES = True mol.electronic_states = [] mol.excitation_energies = [excitation_energy] mol.oscillator_strengths = [ff] else: mol.excitation_energies += [excitation_energy] mol.oscillator_strengths.append(float(xx[8].split('=')[1])) readES = True es_mults.append(xx[3]) es_contribs.append([]) continue if readES: if '->' in line: es_contribs[-1].append({'from': int(line.split('->')[0]), 'to': int(line.split('->')[1].split()[0]), 'coeff': float(line.split()[-1])}) elif '<-' in line: es_contribs[-1].append({'from': int(line.split('<-')[1].split()[0]), 'to': int(line.split('<-')[0]), 'coeff': float(line.split()[-1])}) else: readES = False continue if 'Fundamental Bands' in line and not 'anharmonic_frequencies' in mol.__dict__: read_already_FB = True ireadFB += 1 mol.anharmonic_frequencies = [] mol.harmonic_frequencies = [] continue elif 'Fundamental Bands' in line and not read_already_FB: read_already_FB = True ireadFB += 1 mol.anharmonic_frequencies = [] mol.harmonic_frequencies = [] continue if ireadFB > -1: if len(line.strip()) == 0: ireadFB = -1 continue ireadFB += 1 if ireadFB >= 3: mol.harmonic_frequencies.append(float(line[24:38])) mol.anharmonic_frequencies.append(float(line[38:48])) continue if 'Overtones' in line and not 'anharmonic_overtones' in mol.__dict__: read_already_overtones = True ireadOvertones += 1 mol.anharmonic_overtones = [] mol.harmonic_overtones = [] continue elif 'Overtones' in line and not read_already_overtones: read_already_overtones = True ireadOvertones += 1 mol.anharmonic_overtones = [] mol.harmonic_overtones = [] continue if ireadOvertones > -1: if len(line.strip()) == 0: ireadOvertones = -1 continue ireadOvertones += 1 if ireadOvertones >= 3: mol.anharmonic_overtones.append(float(line[38:48])) mol.harmonic_overtones.append(float(line[24:38].strip().split()[-1])) continue if 'Combination Bands' in line and not 'anharmonic_combination_bands' in mol.__dict__: read_already_comb_bands = True ireadCombinationBands += 1 mol.anharmonic_combination_bands = [] mol.harmonic_combination_bands = [] continue elif 'Combination Bands' in line and not read_already_comb_bands: read_already_comb_bands = True ireadCombinationBands += 1 mol.anharmonic_combination_bands = [] mol.harmonic_combination_bands = [] continue if ireadCombinationBands > -1: if len(line.strip()) == 0: ireadCombinationBands = -1 continue ireadCombinationBands += 1 if ireadCombinationBands >= 3: mol.anharmonic_combination_bands.append(float(line[38:48])) mol.harmonic_combination_bands.append(float(line[24:38].strip().split()[-1])) continue if 'ZPE(anh)' in line: mol.anharmonic_ZPE = float(line[48:61].replace('D','E')) * constants.kJpermol2Hartree if 'Input values of T(K) and P(atm):' in line: i_anh_thermo = 0 mol.temperature = float(line.strip().split()[-2]) E = mol.U - mol.DeltaE2U mol.anharmonic_U0 = E + mol.anharmonic_ZPE mol.anharmonic_H0 = E + mol.anharmonic_ZPE continue if i_anh_thermo > -1: i_anh_thermo += 1 if i_anh_thermo == 4: mol.anharmonic_DeltaE2U = float(line.strip().split()[-2].replace('D','E')) * constants.kJpermol2Hartree mol.anharmonic_U = E + mol.anharmonic_DeltaE2U continue if i_anh_thermo == 5: mol.anharmonic_DeltaE2H = float(line.strip().split()[-2].replace('D','E')) * constants.kJpermol2Hartree mol.anharmonic_H = E + mol.anharmonic_DeltaE2H continue if i_anh_thermo == 6: mol.anharmonic_S = float(line.strip().split()[-3].replace('D','E')) * constants.kJpermol2Hartree / 1000.0 mol.anharmonic_G = mol.anharmonic_H - mol.temperature * mol.anharmonic_S mol.anharmonic_DeltaE2G = mol.anharmonic_G - E i_anh_thermo = -1 continue # Read anharmonic infrared intensities if 'Fundamental Bands' in line and not 'anharmonic_infrared_intensities' in mol.__dict__: read_already_FBIR = True ireadFBIR += 1 mol.anharmonic_infrared_intensities = [] mol.harmonic_infrared_intensities = [] continue elif 'Fundamental Bands' in line and not read_already_FBIR: read_already_FBIR = True ireadFBIR += 1 mol.anharmonic_infrared_intensities = [] mol.harmonic_infrared_intensities = [] continue if ireadFBIR > -1: if len(line.strip()) == 0: ireadFBIR = -1 continue ireadFBIR += 1 if ireadFBIR == 2: if not "I(anharm)" in line: del mol.__dict__['anharmonic_infrared_intensities'] del mol.__dict__['harmonic_infrared_intensities'] ireadFBIR = -1 continue if ireadFBIR >= 3: templine = line.strip() # Sometimes anharmonic IR intensities are very large due to bad PES. Gaussian will output ********** in such cases. if "*" in templine: templine = [each for each in templine if each != '*'] templine = templine.split() try: mol.harmonic_infrared_intensities.append(float(templine[-1])) mol.anharmonic_infrared_intensities.append(np.nan) except: FBIRError = True else: templine = templine.split() try: mol.harmonic_infrared_intensities.append(float(templine[-2])) mol.anharmonic_infrared_intensities.append(float(templine[-1])) except: FBIRError = True if 'Overtones' in line and not 'anharmonic_overtones_infrared_intensities' in mol.__dict__: read_already_OvertonesIR = True ireadOvertonesIR += 1 mol.anharmonic_overtones_infrared_intensities = [] mol.harmonic_overtones_infrared_intensities = [] continue elif 'Overtones' in line and not read_already_OvertonesIR: read_already_OvertonesIR = True ireadOvertonesIR += 1 mol.anharmonic_overtones_infrared_intensities = [] mol.harmonic_overtones_infrared_intensities = [] continue if ireadOvertonesIR > -1: if len(line.strip()) == 0: ireadOvertonesIR = -1 continue ireadOvertonesIR += 1 if ireadOvertonesIR == 2: if not "I(anharm)" in line: del mol.__dict__['anharmonic_overtones_infrared_intensities'] del mol.__dict__['harmonic_overtones_infrared_intensities'] ireadOvertonesIR = -1 continue if ireadOvertonesIR >= 3: templine = line.strip() # Sometimes anharmonic IR intensities are very large due to bad PES. Gaussian will output ********** in such cases. if "*" in templine: templine = [each for each in templine if each != '*'] templine = templine.split() try: mol.harmonic_overtones_infrared_intensities.append(float(templine[-1])) mol.anharmonic_overtones_infrared_intensities.append(np.nan) except: OvertonesIRError = True else: templine = templine.split() try: mol.harmonic_overtones_infrared_intensities.append(float(templine[-2])) mol.anharmonic_overtones_infrared_intensities.append(float(templine[-1])) except: OvertonesIRError = True if 'Combination Bands' in line and not 'anharmonic_combination_bands_infrared_intensities' in mol.__dict__: read_already_CombinationBandsIR = True ireadCombinationBandsIR += 1 mol.anharmonic_combination_bands_infrared_intensities = [] mol.harmonic_combination_bands_infrared_intensities = [] continue elif 'Combination Bands' in line and not read_already_CombinationBandsIR: read_already_CombinationBandsIR = True ireadCombinationBandsIR += 1 mol.anharmonic_combination_bands_infrared_intensities = [] mol.harmonic_combination_bands_infrared_intensities = [] continue if ireadCombinationBandsIR > -1: if len(line.strip()) == 0: ireadCombinationBandsIR = -1 continue ireadCombinationBandsIR += 1 if ireadCombinationBandsIR == 2: if not "I(anharm)" in line: del mol.__dict__['anharmonic_combination_bands_infrared_intensities'] del mol.__dict__['harmonic_combination_bands_infrared_intensities'] ireadCombinationBandsIR = -1 continue if ireadCombinationBandsIR >= 3: templine = line.strip() # Sometimes anharmonic IR intensities are very large due to bad PES. Gaussian will output ********** in such cases. if "*" in templine: templine = [each for each in templine if each != '*'] templine = templine.split() try: mol.harmonic_combination_bands_infrared_intensities.append(float(templine[-1])) mol.anharmonic_combination_bands_infrared_intensities.append(np.nan) except: CombinationBandsIRError = True else: templine = templine.split() try: mol.harmonic_combination_bands_infrared_intensities.append(float(templine[-2])) mol.anharmonic_combination_bands_infrared_intensities.append(float(templine[-1])) except: CombinationBandsIRError = True #if 'Normal termination of Gaussian' in line: # successful = True archiveText = r'' Nlines = 0 for line in archive: Nlines += 1 archiveText += line.strip() if 'NImag=' in archiveText: if line[-len('NImag='):] != 'NImag=': break if '\\\\@' in archiveText: break optjob = False if archiveText != r'': method_names = ['HF', 'MP2', 'MP3', 'MP4D', 'MP4DQ', 'MP4SDQ', 'MP4SDTQ', 'MP5', 'CISD', 'QCISD', 'QCISD(T)', 'CCSD', 'CCSD(T)'] method_energies = [] archiveTextSplit = archiveText.split('\\\\') # here it splits with '\\' as a delimiter if 'opt'.casefold() in archiveTextSplit[0].casefold(): optjob = True if 'freq'.casefold() in archiveTextSplit[1].casefold() and chkfilename is not None: dir_path = os.path.dirname(os.path.realpath(filename)) if os.path.exists(f'{dir_path}/{chkfilename}'): read_Hessian_matrix_from_Gaussian_chkfile(f'{dir_path}/{chkfilename}', mol) coords = archiveTextSplit[3] coords = coords.split('\\') # here it splits with '\' as a delimiter charge, mult = (int(xx) for xx in coords[0].split(',')) mol.charge = charge mol.multiplicity = mult coords = coords[1:] if mol.atoms == []: for coord in coords: xx = coord.split(',') if len(xx[1:]) > 3: del xx[1] mol.atoms.append(data.atom(element_symbol=xx[0], xyz_coordinates=data.array([float(yy) for yy in xx[1:]]))) else: for idx, coord in enumerate(coords): xx = coord.split(',') mol.atoms[idx].element_symbol = xx[0] if len(xx[1:]) > 3: del xx[1] mol.atoms[idx].xyz_coordinates = data.array([float(yy) for yy in xx[1:]]) for xx in archiveTextSplit[4].split('\\'): method_flag = False for name in method_names: if f'{name}=' in xx: yy = xx.split('=') method_energies.append((yy[0], float(yy[-1]))) method_flag = True break if method_flag: continue if 'DipoleDeriv' in xx: yy = xx.split('=')[-1].strip() # to-do: reshape, make sure the same as in Yifan's implementation before with 'DipoleDeriv ' (note space!) mol.dipole_derivatives = data.array([float(zz) for zz in yy.split(',')]) / constants.Debye # do we need to divide by Debye? continue if len(method_energies) > 1: method_children = [] for method in method_energies: method_children.append(data.properties_tree_node(name=method[0])) method_children[-1].energy = method[1] mol.__dict__[method[0]] = method_children[-1] mol.energy_levels = data.properties_tree_node(name='energy_levels',children=method_children) mol.energy = method_energies[-1][1] # in old implementation it was wrong: mol.HF = energy, now it is mol.HF.energy = energy elif len(method_energies) == 1: mol.energy = method_energies[0][1] if len(mol.excitation_energies) > 0: mol.excitation_energies = data.array(mol.excitation_energies) mol.electronic_states = [mol.copy(atomic_labels=[], molecular_labels=[]) for ii in range(mol.nstates)] for istate, mol_state in enumerate(mol.electronic_states): if istate == 0: mol_state.energy = mol.energy continue mol_state.energy = mol.energy + mol.excitation_energies[istate-1] if 'Singlet' in es_mults[istate-1]: mol_state.multiplicity = 1 elif 'Doublet' in es_mults[istate-1]: mol_state.multiplicity = 2 elif 'Triplet' in es_mults[istate-1]: mol_state.multiplicity = 3 elif 'Quartet' in es_mults[istate-1]: mol_state.multiplicity = 4 mol_state.mo_contributions = es_contribs[istate-1] if 'oscillator_strengths' in mol.__dict__: if len(mol.oscillator_strengths) > 0: mol.oscillator_strengths = data.array(mol.oscillator_strengths) if 'anharmonic_frequencies' in mol.__dict__.keys(): # by default, we should get the anharmonic properties if they are available order = np.argsort(mol.harmonic_frequencies) mol.harmonic_frequencies = [mol.harmonic_frequencies[ii] for ii in order] mol.anharmonic_frequencies = [mol.anharmonic_frequencies[ii] for ii in order] mol.frequencies = mol.anharmonic_frequencies # for some reason Gaussian prints twice the same analysis of harmonic frequencies, so we need to delete the duplicated information freq_len = len(mol.frequencies) if 'anharmonic_infrared_intensities' in mol.__dict__.keys(): mol.anharmonic_infrared_intensities = [mol.anharmonic_infrared_intensities[ii] for ii in order] mol.harmonic_infrared_intensities = np.copy(mol.infrared_intensities[:freq_len]) mol.infrared_intensities = mol.anharmonic_infrared_intensities else: # If anharmonic IR intensities are not calculated, harmonic IR intensities will be read twice if len(mol.frequencies) != len(mol.infrared_intensities): mol.infrared_intensities = mol.infrared_intensities[:len(mol.frequencies)] #mol.frequencies = mol.frequencies[:freq_len] #mol.infrared_intensities = mol.infrared_intensities[:freq_len] # to-do check whether it is correct length now mol.force_constants = mol.force_constants[:freq_len] mol.reduced_masses = mol.reduced_masses[:freq_len] for iatom in range(len(mol.atoms)): mol.atoms[iatom].normal_modes = mol.atoms[iatom].normal_modes[:freq_len] thermochemistry_properties = ['ZPE','DeltaE2U','DeltaE2H','DeltaE2G','U0','H0','U','H','G','S'] for each_property in thermochemistry_properties: mol.__dict__['harmonic_'+each_property] = mol.__dict__[each_property] mol.__dict__[each_property] = mol.__dict__['anharmonic_'+each_property] if FBIRError: print("Warning! An error occurred when reading anharmonic IR intensities.") if OvertonesIRError: print("Warning! An error occurred when reading anharmonic overtones IR intensities.") if CombinationBandsIRError: print("Warning! An error occurred when reading anharmonic combination bands IR intensities.") if 'force_constants' in mol.__dict__.keys(): mol.force_constants = data.array(mol.force_constants) if 'reduced_masses' in mol.__dict__.keys(): mol.reduced_masses = data.array(mol.reduced_masses) for atom in mol.atoms: if 'normal_modes' in atom.__dict__.keys(): atom.normal_modes = data.array(atom.normal_modes) if 'infrared_intensities' in mol.__dict__.keys(): if np.all(mol.infrared_intensities == 0): del mol.__dict__['infrared_intensities'] # delete the last geom if there were fewer SCF energies than geometries # (can be due to failed SCF calculations or duplicated printing after successful geom opt) if len(scf_energies) < len(xyz_input_orientation) and len(xyz_input_orientation) > 1: if np.max(np.abs(data.array(xyz_input_orientation[-1]) - data.array(xyz_input_orientation[-2]))) < 1e-6: del xyz_input_orientation[-1] if len(scf_energies) < len(xyz_standard_orientation) and len(xyz_standard_orientation) > 1: if np.max(np.abs(data.array(xyz_standard_orientation[-1]) - data.array(xyz_standard_orientation[-2]))) < 1e-6: del xyz_standard_orientation[-1] # if it was opt freq job, then geometries and scf energies are duplicated at the end too if len(scf_energies) > 1: if len(scf_energies) == len(xyz_input_orientation) and len(scf_energies) == len(xyz_standard_orientation): if abs(scf_energies[-2] - scf_energies[-1]) < 1e-6 and np.max(np.abs(data.array(xyz_input_orientation[-1]) - data.array(xyz_input_orientation[-2]))) < 1e-6: del scf_energies[-1] del xyz_input_orientation[-1] del xyz_standard_orientation[-1] if len(energy_gradients) > len(scf_energies): if np.max(np.abs(data.array(energy_gradients[-1]) - data.array(energy_gradients[-2]))) < 1e-6: del energy_gradients[-1] elif len(scf_energies) == len(xyz_input_orientation): if abs(scf_energies[-2] - scf_energies[-1]) < 1e-6 and np.max(np.abs(data.array(xyz_input_orientation[-1]) - data.array(xyz_input_orientation[-2]))) < 1e-6: del scf_energies[-1] del xyz_input_orientation[-1] if len(energy_gradients) > len(scf_energies): if np.max(np.abs(data.array(energy_gradients[-1]) - data.array(energy_gradients[-2]))) < 1e-6: del energy_gradients[-1] elif len(scf_energies) == len(xyz_standard_orientation): if abs(scf_energies[-2] - scf_energies[-1]) < 1e-6 and np.max(np.abs(data.array(xyz_standard_orientation[-1]) - data.array(xyz_standard_orientation[-2]))) < 1e-6: del scf_energies[-1] del xyz_standard_orientation[-1] if len(energy_gradients) > len(scf_energies): if np.max(np.abs(data.array(energy_gradients[-1]) - data.array(energy_gradients[-2]))) < 1e-6: del energy_gradients[-1] # get molecular database for multiple single-point calculations in the output file if len(scf_energies) > 1: db = data.molecular_database() db.molecules = [mol.copy(atomic_labels=[], molecular_labels=[]) for ii in range(len(scf_energies))] for iscf in range(len(scf_energies)): db.molecules[iscf].energy = scf_energies[iscf] if len(scf_energies) == len(xyz_input_orientation): for iscf in range(len(scf_energies)): db.molecules[iscf].xyz_coordinates = data.array(xyz_input_orientation[iscf]) # to-do: reorient to input orientation using mol.xyz_coordinates which are already in input orientation for the last geometry #elif len(scf_energies) == len(xyz_standard_orientation) # for iscf in range(len(scf_energies)): # db.molecules[iscf].xyz_coordinates = data.array(xyz_standard_orientation[iscf]) if len(scf_energies) == len(energy_gradients): for iscf in range(len(scf_energies)): db.molecules[iscf].energy_gradients = data.array(energy_gradients[iscf]) mol.molecular_database = db if optjob: moltraj = data.molecular_trajectory() for istep, mol_step in enumerate(db): moltraj.steps.append(data.molecular_trajectory_step(step=istep, molecule=mol_step)) mol.optimization_trajectory = moltraj # get energy gradients from the last place they are calculated if len(energy_gradients) > 0: mol.energy_gradients = data.array(energy_gradients[-1]) if 'molecular_database' in mol.__dict__.keys(): if np.max(np.abs(db[-1].xyz_coordinates - mol.xyz_coordinates)) < 1e-6: tmpmol = mol.copy() del tmpmol.__dict__['molecular_database'] mol.molecular_database.molecules[-1] = tmpmol if 'optimization_trajectory' in tmpmol.__dict__.keys(): del tmpmol.__dict__['optimization_trajectory'] mol.optimization_trajectory.steps[-1].molecule = tmpmol if molecule is None: return mol
if __name__ == '__main__': model_predict_kwargs_str_file, _, EIn_file, EOu_file, _, _, _ = sys.argv[1:] with open(model_predict_kwargs_str_file) as f: model_predict_kwargs = eval(f.read()) gaussian_external(EIn_file, EOu_file, model_predict_kwargs)