mlatom.interfaces.orca_interface 源代码

#!/usr/bin/env python3
'''
.. code-block::

  !---------------------------------------------------------------------------! 
  ! orca: interface to the ORCA program                                       ! 
  ! Implementations by: Yuxinxin Chen                                         !
  !---------------------------------------------------------------------------! 
'''

import os
import numpy as np
import math
import tempfile, subprocess
from .. import constants, models
from ..decorators import doc_inherit

[文档] class orca_methods(models.model, metaclass=models.meta_method): ''' ORCA interface Arguments: method (str): method used the same as in ORCA, e.g. ``'B3LYP/6-31G*'`` (case insensitive). This is the first line in orca input and you can also store the whole first line here. save_files_in_current_directory (bool): whether to keep input and output files, default ``'False'`` working_directory (str): path to the directory where the program output files and other tempory files are saved, default ``'None'`` nthreads (int): equivalent to %pal nprocs in ORCA input file nthreads_list (list): a list of number of nthreads used in CCSD(T)*/CBS method. The order should be [mp2_tz, mp2_qz, dlpno_normal_dz, dlpno_normal_tz, dlpno_tight_dz] additional_keywords (list): list of keywords(str) to be added to orca input (first line) input_file (str): name of your orca input file. Default will use "[molecule number]_" output_keywords (list): list of keywords that you want to extract from output file. (Currently customized keywords are only supported in energy calculation and in property.txt file) .. note:: When using CCSD(T)*/CBS for calculation, please make sure the ``'nthreads'`` you used for each method will not cause memory exceeding. We suggest using ``'nthreads_list'`` to properly set ``'nthreads'`` for each component method in the order: [MP2/cc-pVTZ, MP2/cc-pVQZ, DLPNO-CCSD(T)-normalPNO/cc-pVDZ, DLPNO-CCSD(T)-normalPNO/cc-pVTZ, DLPNO-CCSD(T)-tightPNO/cc-pVTZ] If only ``'nthreads'`` is set, all component methods would use the same number of threads. ''' def __init__(self, method='wb97x/6-31G*', **kwargs): if not "orcabin" in os.environ: raise ValueError('enviromental variable orcabin is not set') else: self.orcabin = os.environ['orcabin'] self.method = method self.orca_successful = True if self.method == 'CCSD(T)*/CBS': self.cc_method = ccsdtstarcbs(**kwargs) if 'nthreads' in kwargs: self.nthreads = kwargs['nthreads'] else: self.nthreads = 1 if 'maxcore' in kwargs: self.maxcore = kwargs['maxcore'] else: self.maxcore = 1000 if 'additional_keywords' in kwargs: self.additional_keywords = kwargs['additional_keywords'] else: self.additional_keywords = None if 'save_files_in_current_directory' in kwargs: self.save_files_in_current_directory = kwargs['save_files_in_current_directory'] else: self.save_files_in_current_directory = True if 'working_directory' in kwargs: self.working_directory = kwargs['working_directory'] else: self.working_directory = None if 'input_file' in kwargs: self.input_file = kwargs['input_file'] else: self.input_file = '' if 'output_keywords' in kwargs: self.output_keywords = kwargs['output_keywords'] else: self.output_keywords = None
[文档] @doc_inherit def predict(self, molecular_database=None, molecule=None, calculate_energy=True, calculate_energy_gradients=False, calculate_hessian=False, **kwargs): ''' **kwargs: ``# needs to be documented``. ''' molDB = super().predict(molecular_database=molecular_database, molecule=molecule) if self.method == 'CCSD(T)*/CBS': self.cc_method.predict(molecular_database=molDB) else: self.calculate_energy = calculate_energy self.calculate_energy_gradients = calculate_energy_gradients self.calculate_hessian = calculate_hessian with tempfile.TemporaryDirectory() as tmpdirname: if self.save_files_in_current_directory: tmpdirname = '.' if self.working_directory is not None: tmpdirname = self.working_directory if not os.path.exists(tmpdirname): os.makedirs(tmpdirname) tmpdirname = os.path.abspath(tmpdirname) for imol in range(len(molDB.molecules)): imolecule = molDB.molecules[imol] self.inpfile = f'{tmpdirname}/molecule{imol}_' + self.input_file # calculation self.generate_orca_inp(molecule=imolecule) # run orca orcaargs = [self.orcabin, f'{self.inpfile}.inp', '>', f'{self.inpfile}.out'] proc = subprocess.Popen(' '.join(orcaargs), stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=tmpdirname, universal_newlines=True, shell=True) proc.communicate() # read output self.parse_orca_output(molecule=imolecule)
def parse_orca_output(self, molecule): natom = len(molecule.atoms) if self.calculate_energy: if not self.calculate_energy_gradients: with open(f'{self.inpfile}_property.txt', 'r') as orcaout: orcaout_lines = orcaout.readlines() for ii in range(len(orcaout_lines)): if 'SCF Energy' in orcaout_lines[ii]: # ? check SCf energy or total enenrgy molecule.energy = float(orcaout_lines[ii].split()[-1]) else: with open(f'{self.inpfile}.engrad', 'r') as orcaout: orcaout_lines = orcaout.readlines() for ii in range(len(orcaout_lines)): if 'total energy' in orcaout_lines[ii]: molecule.energy = float(orcaout_lines[ii+2]) if self.calculate_energy_gradients: with open(f'{self.inpfile}.engrad', 'r') as orcaout: orcaout_lines = orcaout.readlines() for ii in range(len(orcaout_lines)): if 'gradient' in orcaout_lines[ii]: grad = orcaout_lines[ii+2: ii+2+natom*3] grad = [float(g.split()[0])/constants.Bohr2Angstrom for g in grad] for ii in range(natom): a = molecule.atoms[ii] a.energy_gradients = grad[3*ii: 3*ii+3] if self.calculate_hessian: with open(f'{self.inpfile}.hess', 'r') as orcaout: orcaout_lines = orcaout.readlines() hessian_index = orcaout_lines.index('$hessian\n') # start from $hessian ncoordinate = int(orcaout_lines[hessian_index+1]) # second line is #coordinates ncircle = int((ncoordinate-0.5) / 5)+1 # blocks hessian_matrix = np.zeros((ncoordinate, ncoordinate)) for ii in range(ncircle): start_index = hessian_index+2+(ncoordinate+1)*ii cols_index_list = orcaout_lines[start_index].split() for jj in range(ncoordinate): hessian_line = orcaout_lines[start_index+1+jj].split() for kk in range(len(cols_index_list)): col_index = cols_index_list[kk] hessian_matrix[int(hessian_line[0])][int(col_index)] = float(hessian_line[kk+1])/constants.Bohr2Angstrom**2 molecule.hessian = hessian_matrix if not self.calculate_energy and self.output_keywords: for keyword in self.output_keywords: keyword_name = self.input_file+'_'+keyword.lower().replace(' ', '_') with open(f'{self.inpfile}_property.txt', 'r') as orcaout: orcaout_lines = orcaout.readlines() for ii in range(len(orcaout_lines)): if keyword in orcaout_lines[ii]: molecule.__dict__[keyword_name] = float(orcaout_lines[ii].split()[-1]) def generate_orca_inp(self, molecule, **kwargs): with open(f'{self.inpfile}.inp', 'w') as forca: #keywords if '/' in self.method and len(self.method.split('/'))==2: method_to_write = ' '.join(self.method.split('/')) else: method_to_write = self.method if self.additional_keywords: method_to_write += ' '.join(self.additional_keywords) forca.writelines(f'! {method_to_write}\n') # decide calculation type if self.calculate_energy: calculation_type = [] calculation_type += ['SP'] if self.calculate_energy_gradients: calculation_type += ['ENGRAD'] if self.calculate_hessian: calculation_type += ['FREQ'] calculation_type = ' '.join(calculation_type) forca.writelines(f'! {calculation_type}\n') forca.writelines(f'%pal nprocs {self.nthreads} end\n') forca.writelines(f'%maxcore {self.maxcore}\n') #structure forca.writelines(f'* xyz {molecule.charge} {molecule.multiplicity}\n') for atom in molecule.atoms: line = '%s %12.8f %12.8f %12.8f\n' % ( atom.element_symbol, atom.xyz_coordinates[0], atom.xyz_coordinates[1], atom.xyz_coordinates[2]) forca.writelines(line) forca.writelines('*')
[文档] class ccsdtstarcbs(models.model): def __init__(self, **kwargs): if 'nthreads_list' in kwargs: self.nthreads_list = kwargs['nthreads_list'] if len(self.nthreads_list) != 5: print('Number of values to asign to each methods should be 5. Default nthreads will be used') self.nthreads_list = None else: self.nthreads_list = None self.successful = True if self.nthreads_list: kwargs.update({'nthreads':self.nthreads_list[0], 'input_file':'mp2_tz', 'output_keywords':['SCF Energy', 'Correlation Energy']}) else: kwargs.update({'input_file':'mp2_tz', 'output_keywords':['SCF Energy', 'Correlation Energy']}) self.mp2_tz = orca_methods(method='''RIMP2 RIJK cc-pVTZ cc-pVTZ/JK cc-pVTZ/C\n! tightscf noautostart scfconvforced miniprint nopop''', **kwargs) if self.nthreads_list: kwargs.update({'nthreads':self.nthreads_list[1], 'input_file':'mp2_qz', 'output_keywords':['SCF Energy', 'Correlation Energy']}) else: kwargs.update({'input_file':'mp2_qz', 'output_keywords':['SCF Energy', 'Correlation Energy']}) self.mp2_qz = orca_methods(method='''RIMP2 RIJK cc-pVQZ cc-pVQZ/JK cc-pVQZ/C\n! tightscf noautostart scfconvforced miniprint nopop''', **kwargs) if self.nthreads_list: kwargs.update({'nthreads':self.nthreads_list[2],'input_file':'dlpno_normal_dz', 'output_keywords':['Total Correlation Energy']}) else: kwargs.update({'input_file':'dlpno_normal_dz', 'output_keywords':['Total Correlation Energy']}) self.npno_dz = orca_methods(method='''DLPNO-CCSD(T) normalPNO RIJK cc-pVDZ cc-pVDZ/C cc-pvTZ/JK\n! tightscf noautostart scfconvforced miniprint nopop''', **kwargs) if self.nthreads_list: kwargs.update({'nthreads':self.nthreads_list[3], 'input_file':'dlpno_normal_tz','output_keywords':['Total Correlation Energy']}) else: kwargs.update({'input_file':'dlpno_normal_tz','output_keywords':['Total Correlation Energy']}) self.npno_tz = orca_methods(method='''DLPNO-CCSD(T) normalPNO RIJK cc-pVTZ cc-pVTZ/C cc-pVTZ/JK\n! tightscf noautostart scfconvforced miniprint nopop''', **kwargs) if self.nthreads_list: kwargs.update({'nthreads':self.nthreads_list[4], 'input_file':'dlpno_tight_dz','output_keywords':['Total Correlation Energy']}) else: kwargs.update({'input_file':'dlpno_tight_dz','output_keywords':['Total Correlation Energy']}) self.tpno_dz = orca_methods(method='''DLPNO-CCSD(T) tightPNO RIJK cc-pVDZ cc-pVDZ/C cc-pVTZ/JK\n! tightscf noautostart scfconvforced miniprint nopop''', **kwargs) self.combination = [self.mp2_tz, self.mp2_qz, self.npno_dz, self.npno_tz, self.tpno_dz]
[文档] def predict(self, molecular_database, **kwargs): for method in self.combination: method.predict(molecular_database=molecular_database, calculate_energy=False) for mol in molecular_database: alpha = 5.46; beta = 2.51 hf_tz = mol.mp2_tz_scf_energy hf_qz = mol.mp2_qz_scf_energy mp2_tz = mol.mp2_tz_correlation_energy mp2_qz = mol.mp2_qz_correlation_energy npno_dz = mol.dlpno_normal_dz_total_correlation_energy npno_tz = mol.dlpno_normal_tz_total_correlation_energy tpno_dz = mol.dlpno_tight_dz_total_correlation_energy E_hf_xtrap = (math.exp(-alpha * 4**0.5) * hf_tz - math.exp(-alpha * 3**0.5) * hf_qz) \ / (math.exp(-alpha * 4**0.5) - math.exp(-alpha * 3**0.5)) E_mp2_xtrap = (4**beta * mp2_qz - 3**beta * mp2_tz) / (4**beta - 3**beta) energy = E_hf_xtrap + E_mp2_xtrap - mp2_tz + npno_tz + tpno_dz - npno_dz mol.energy = energy