'''
.. code-block::
!---------------------------------------------------------------------------!
! Interface_DeePMDkit: Interface between DeePMD-kit and MLatom !
! Implementations by: Fuchun Ge !
!---------------------------------------------------------------------------!
'''
from __future__ import annotations
from typing import Any, Union, Dict
import os, sys, uuid, tempfile, subprocess, json
import numpy as np
from .. import data
from ..model_cls import ml_model, tensorflow_model, hyperparameter, hyperparameters
from ..decorators import doc_inherit
if 'DeePMDkit' in os.environ:
DeePMDdir = os.environ['DeePMDkit']
DeePMDbin = DeePMDdir+'/dp'
else:
print('please set $DeePMDkit')
DeePMDbin = ''
def molDB2DPdata(dir_name, molecular_database, validation_molecular_database, type_map=None,
property_to_learn=None,
xyz_derivative_property_to_learn=None):
if os.path.exists(dir_name):
if not os.path.isdir(dir_name):
print(f'file {dir_name} already exists')
return
else:
os.mkdir(dir_name)
if os.path.exists(f'{dir_name}/set.000'):
print(f'{dir_name}/set.000 already exists')
return
else:
os.mkdir(f'{dir_name}/set.000')
if os.path.exists(f'{dir_name}/set.001'):
print(f'{dir_name}/set.001 already exists')
return
else:
os.mkdir(f'{dir_name}/set.001')
if type_map is None:
# type_map = list(np.sort(np.unique(molDB.get_element_symbols())))
type_map = list(sorted(set(molecular_database[0].get_element_symbols()), key=lambda x: list(molDB[0].get_element_symbols()).index(x)))
with open(f'{dir_name}/type.raw', 'w') as f:
for element in molecular_database[0].get_element_symbols():
f.write("%d " % type_map.index(element))
np.save(f'{dir_name}/set.000/box.npy', np.repeat(100 * np.eye(3).reshape([1, -1]), len(molecular_database), axis=0))
np.save(f'{dir_name}/set.001/box.npy', np.repeat(100 * np.eye(3).reshape([1, -1]), len(validation_molecular_database), axis=0))
np.save(f'{dir_name}/set.000/coord.npy', molecular_database.xyz_coordinates.reshape(len(molecular_database), -1))
np.save(f'{dir_name}/set.001/coord.npy', validation_molecular_database.xyz_coordinates.reshape(len(validation_molecular_database), -1))
if property_to_learn:
np.save(f'{dir_name}/set.000/energy.npy', molecular_database.get_properties(property_to_learn))
np.save(f'{dir_name}/set.001/energy.npy', validation_molecular_database.get_properties(property_to_learn))
if xyz_derivative_property_to_learn:
np.save(f'{dir_name}/set.000/force.npy', -1 * molecular_database.get_xyz_vectorial_properties(xyz_derivative_property_to_learn).reshape(len(molecular_database), -1))
np.save(f'{dir_name}/set.001/force.npy', -1 * validation_molecular_database.get_xyz_vectorial_properties(xyz_derivative_property_to_learn).reshape(len(validation_molecular_database), -1))
[文档]
class dpmd(ml_model, tensorflow_model):
'''
Create an `DeepPot-SE <https://proceedings.neurips.cc/paper_files/paper/2018/hash/e2ad76f2326fbc6b56a45a56c59fafdb-Abstract.html>`_ model object.
Interfaces to `DeepMD-kit <https://doi.org/10.1016/j.cpc.2018.03.016>`_.
Arguments:
model_file (str, optional): The filename that the model to be saved with or loaded from.
hyperparameters (Dict[str, Any] | :class:`mlatom.hyperparameters`, optional): Updates the hyperparameters of the model with provided.
verbose (int, optional): 0 for silence, 1 for verbosity.
'''
hyperparameters = hyperparameters({
'early_stopping': hyperparameter(value=True, choices=[True, False], optimization_space='choices', dtype=bool),
'early_stopping_paticence': hyperparameter(value=60, minval=16, maxval=256, optimization_space='linear', dtype=int),
'early_stopping_threshold': hyperparameter(value=0.0000, minval=-0.0001, maxval=0.0001, optimization_space='linear', dtype=float),
'batch_size': hyperparameter(value=4, minval=1, maxval=1024, optimization_space='linear', dtype=int),
'disp_freq': hyperparameter(value=100, minval=10, maxval=10000, optimization_space='linear', dtype=int),
'stop_batch': hyperparameter(value=400000, minval=100, maxval=1000000, optimization_space='linear', dtype=int),
"start_lr": hyperparameter(value=0.005, minval=0.0001, maxval=0.01, optimization_space='log', dtype=float),
"decay_steps": hyperparameter(value=2000, minval=100, maxval=10000, optimization_space='log', dtype=int),
"decay_rate": hyperparameter(value=0.95, minval=0.5, maxval=1, optimization_space='log', dtype=float),
"start_pref_e": hyperparameter(value=0.02, minval=0, maxval=100, optimization_space='log', dtype=float),
"limit_pref_e": hyperparameter(value=2, minval=0, maxval=1000, optimization_space='log', dtype=float),
"start_pref_f": hyperparameter(value=1000, minval=0, maxval=1000, optimization_space='log', dtype=float),
"limit_pref_f": hyperparameter(value=1, minval=0, maxval=1000, optimization_space='log', dtype=float),
"start_pref_v": hyperparameter(value=0, minval=0, maxval=1000, optimization_space='log', dtype=float),
"limit_pref_v": hyperparameter(value=0, minval=0, maxval=1000, optimization_space='log', dtype=float),
'rcut': hyperparameter(value=6.0, minval=1.0, maxval=10.0, optimization_space='linear', dtype=float),
'neuron': hyperparameter('30,60'),
'n_neuron': hyperparameter('80,80,80'),
'sel': hyperparameter(value=16, minval=1, maxval=32, optimization_space='linear', dtype=str),
})
json = {
"model": {
"descriptor": {
"type": "se_a",
"sel": [
16,
16,
16
],
"rcut_smth": 6.3,
"rcut": 6.5,
"neuron": [
30,
60
],
"resnet_dt": False,
"axis_neuron": 6,
"seed": 42
},
"fitting_net": {
"n_neuron": [
80,
80,
80
],
"resnet_dt": True,
"seed": 42
},
"type_map": [
"C",
"H",
"O"
]
},
"learning_rate": {
"type": "exp",
"start_lr": 0.005,
"decay_steps": 2000,
"decay_rate": 0.95
},
"loss": {
"start_pref_e": 0.02,
"limit_pref_e": 2,
"start_pref_f": 1000,
"limit_pref_f": 1,
"start_pref_v": 0,
"limit_pref_v": 0
},
"training": {
"systems": [
"./"
],
"set_prefix": "set",
"stop_batch": 400000,
"batch_size": 4,
"seed": 42,
"disp_file": "lcurve.out",
"disp_freq": 100,
"numb_test": 10,
"save_freq": 100,
"save_ckpt": "model.ckpt",
"load_ckpt": "model.ckpt",
"disp_training": True,
"time_training": True,
"profiling": False,
"profiling_file": "timeline.json"
}
}
type_map = []
json_input = None
property_name = 'y'
program = 'DeepMD-kit'
meta_data = {
"genre": "neural network"
}
model_file = None
model = None
verbose = False
def __init__(self, model_file=None, hyperparameters={}, verbose=1,):
self.hyperparameters = self.hyperparameters.copy()
self.hyperparameters.update(hyperparameters)
self.verbose = verbose
if model_file:
self.model_file = model_file
if os.path.exists(self.model_file):
if os.path.exists(f"{self.model_file}.json"):
with open(f"{self.model_file}.json") as f:
self.json = json.load(f)
self.type_map = self.json["model"]["type_map"]
else:
self.model_file = f'DPMD_{str(uuid.uuid4())}.pb'
def parse_args(self, args):
super().parse_args(args)
for hyperparam in self.hyperparameters:
if hyperparam in args.hyperparameter_optimization['hyperparameters']:
self.parse_hyperparameter_optimization(args, hyperparam)
# elif hyperparam in args.data:
# self.hyperparameters[hyperparam].value = args.data[hyperparam]
elif 'deepmd' in args.data and hyperparam in args.deepmd.data:
self.hyperparameters[hyperparam].value = args.deepmd.data[hyperparam]
if 'deepmd' in args.data:
if 'input' in args.deepmd.data:
self.update_json(json_input=args.deepmd.input)
def update_json(self, **kwargs):
if 'json_input' in kwargs and kwargs['json_input']:
with open(kwargs['json_input'], 'r') as f:
self.json.update(json.load(f))
self.json['input'] = kwargs['json_input']
else:
self.json["model"]["type_map"] = self.type_map
if len(self.json['model']['descriptor']['sel']) != len(self.type_map):
self.json['model']['descriptor']['sel'] = [self.json['model']['descriptor']['sel'][0]] * len(self.type_map)
self.json['training']['batch_size'] = kwargs['batch_size'].value
self.json['training']['stop_batch'] = kwargs['stop_batch'].value
self.json['training']['disp_freq'] = kwargs['disp_freq'].value
self.json['training']['save_freq'] = kwargs['disp_freq'].value
self.json['learning_rate']["start_lr"] = kwargs["start_lr"].value
self.json['learning_rate']["decay_steps"] = kwargs["decay_steps"].value
self.json['learning_rate']["decay_rate"] = kwargs["decay_rate"].value
self.json['loss']["start_pref_e"] = kwargs["start_pref_e"].value
self.json['loss']["limit_pref_e"] = kwargs["limit_pref_e"].value
self.json['loss']["start_pref_f"] = kwargs["start_pref_f"].value
self.json['loss']["limit_pref_f"] = kwargs["limit_pref_f"].value
self.json['loss']["start_pref_v"] = kwargs["start_pref_v"].value
self.json['loss']["limit_pref_v"] = kwargs["limit_pref_v"].value
if 'property_to_learn' in kwargs and not kwargs['property_to_learn']:
self.json['loss']['limit_pref_e'] = 0
self.json['loss']['start_pref_e'] = 0
if 'xyz_derivative_property_to_learn' in kwargs and not kwargs['xyz_derivative_property_to_learn']:
self.json['loss']['limit_pref_f'] = 0
self.json['loss']['start_pref_f'] = 0
self.json['model']["descriptor"]['sel'] = [int(i) for i in kwargs["sel"].value.strip('[]').split(',')]
if len(self.json['model']["descriptor"]['sel']) == 1:
self.json['model']["descriptor"]['sel'] = self.json['model']["descriptor"]['sel'] * len(self.json["model"]["type_map"])
self.json['model']["descriptor"]['rcut'] = kwargs["rcut"].value
self.json['model']["descriptor"]['neuron'] = [int(i) for i in kwargs["neuron"].value.strip('[]').split(',')]
self.json['model']["fitting_net"]['n_neuron'] = [int(i) for i in kwargs["n_neuron"].value.strip('[]').split(',')]
self.type_map = self.json["model"]["type_map"]
self.json['training']['set_prefix'] = 'set'
if 'dirname' in kwargs:
self.json['training']['systems'] = kwargs['dirname']
with open(f"{self.model_file}.json", "w") as f:
json.dump(self.json, f, indent=4)
[文档]
def train(
self,
molecular_database: data.molecular_database,
property_to_learn: str = 'energy',
xyz_derivative_property_to_learn: str = None,
validation_molecular_database: Union[data.molecular_database, str, None] = 'sample_from_molecular_database',
hyperparameters: Union[Dict[str,Any], hyperparameters] = {},
spliting_ratio=0.8,
stdout=None,
stderr=None,
json_input=None,
dirname=None,
):
self.hyperparameters.update(hyperparameters)
FNULL = open(os.devnull, 'w')
if not self.verbose:
if not stdout:
stdout = FNULL
if not stderr:
stderr = FNULL
else:
stdout = sys.stdout
stderr = sys.stderr
if os.path.exists(self.model_file):
print(f'{self.model_file} exists')
self.model_file = f'DPMD_{str(uuid.uuid4())}.pb'
if self.verbose:
print(f'Trained DPMD model will be saved in {self.model_file}.')
if dirname:
rundir = dirname
else:
tmpdir = tempfile.TemporaryDirectory()
rundir = tmpdir.name
if not self.type_map:
self.type_map = list(sorted(set(molecular_database[0].get_element_symbols()), key=lambda x: list(molecular_database[0].get_element_symbols()).index(x)))
self.update_json(**self.hyperparameters,
json_input=json_input if json_input else self.json_input,
dirname=rundir,
property_to_learn=property_to_learn,
xyz_derivative_property_to_learn=xyz_derivative_property_to_learn)
if validation_molecular_database == 'sample_from_molecular_database':
idx = np.arange(len(molecular_database))
np.random.shuffle(idx)
molecular_database, validation_molecular_database = [molecular_database[i_split] for i_split in np.split(idx, [int(len(idx) * spliting_ratio)])]
elif not validation_molecular_database:
raise NotImplementedError("please specify validation_molecular_database or set it to 'sample_from_molecular_database'")
molDB2DPdata(rundir, molecular_database, validation_molecular_database, self.json["model"]["type_map"], property_to_learn, xyz_derivative_property_to_learn)
if self.hyperparameters['early_stopping'].value:
earlyStop = earlyStopCls(
patience=self.hyperparameters['early_stopping_paticence'].value, threshold=self.hyperparameters['early_stopping_threshold'].value
)
if self.verbose:
print(f' > {DeePMDbin} train {self.model_file}.json')
sys.stdout.flush()
if os.path.exists(self.json['training']['disp_file']):
os.remove(self.json['training']['disp_file'])
proc = subprocess.Popen([DeePMDbin, "train", f"{self.model_file}.json"], stdout=subprocess.PIPE ,stderr=stderr, universal_newlines=True)
for line in proc.stdout:
if self.verbose:
print(line.replace('\n',''))
if self.hyperparameters['early_stopping'].value:
try:
lastline=subprocess.check_output(['tail', '-1', self.json['training']['disp_file']], stderr=FNULL).split()
loss = float(lastline[1])
n_batch = float(lastline[0])
if earlyStop.current(loss, n_batch, shutup=self.shutup):
print('met early-stopping conditions')
sys.stdout.flush()
proc.terminate()
except:
pass
sys.stdout.flush()
if not dirname:
tmpdir.cleanup()
if self.verbose:
print(' > %s freeze -o %s' % (DeePMDbin, self.model_file))
sys.stdout.flush()
subprocess.call([DeePMDbin, "freeze", "-o", self.model_file], stdout=stdout,stderr=stderr)
if os.path.exists(self.model_file) and self.verbose:
print('model saved in %s' % self.model_file)
FNULL.close()
[文档]
@doc_inherit
def predict(
self,
molecular_database: data.molecular_database = None,
molecule: data.molecule = None,
calculate_energy: bool = False,
calculate_energy_gradients: bool = False,
calculate_hessian: bool = False,
property_to_predict: Union[str, None] = 'estimated_y',
xyz_derivative_property_to_predict: Union[str, None] = None,
hessian_to_predict: Union[str, None] = None,
) -> None:
molDB, property_to_predict, xyz_derivative_property_to_predict, hessian_to_predict = \
super().predict(molecular_database=molecular_database, molecule=molecule, calculate_energy=calculate_energy, calculate_energy_gradients=calculate_energy_gradients, calculate_hessian=calculate_hessian, property_to_predict = property_to_predict, xyz_derivative_property_to_predict = xyz_derivative_property_to_predict, hessian_to_predict = hessian_to_predict)
with tempfile.TemporaryDirectory() as tmpdirname:
np.save(f'{tmpdirname}/coord.npy', molDB.xyz_coordinates.reshape(len(molDB), -1))
if not self.type_map:
self.type_map = list(sorted(set(molDB[0].get_element_symbols()), key=lambda x: list(molDB[0].get_element_symbols()).index(x)))
atom_type = [self.type_map.index(element) for element in molDB[0].get_element_symbols()]
code = f'''import deepmd.DeepPot as DP
import numpy as np
import sys
coord = np.load('{tmpdirname}/coord.npy')
cell = np.repeat(100 * np.eye(3).reshape([1, -1]), len(coord), axis=0)
atom_type = {atom_type}
dp = DP('{self.model_file}')
e, f, v = dp.eval(coord, cell, atom_type)
np.save('{tmpdirname}/energy.npy', e)
np.save('{tmpdirname}/force.npy', f)
'''
FNULL = open(os.devnull, 'w')
subprocess.call([DeePMDdir+"/python", "-c", code],stdout=FNULL , stderr=FNULL)
FNULL.close()
if property_to_predict:
enest = np.load(f'{tmpdirname}/energy.npy').flatten()
molDB.add_scalar_properties(enest, property_to_predict)
if xyz_derivative_property_to_predict:
gradest = np.load(f'{tmpdirname}/force.npy') * -1
molDB.add_xyz_vectorial_properties(gradest, xyz_derivative_property_to_predict)
class earlyStopCls():
def __init__(self, patience = 60, threshold = 0.0001 ):
self.bestloss = np.inf
self.patience = patience
self.threshold = threshold
self.counter = 0
self.last_batch =0
def current(self, loss, n_batch, verbose=1):
if n_batch == self.last_batch:
return False
else:
self.last_batch = n_batch
if loss < (1 - self.threshold)*self.bestloss:
self.bestloss = loss
self.counter = 0
else:
self.counter += 1
if verbose:
print('bestloss: %s early-stopping counter: %s/%s'% (self.bestloss, self.counter, self.patience))
sys.stdout.flush()
return self.counter > self.patience
def printHelp():
helpText = __doc__.replace('.. code-block::\n\n', '') + '''
To use Interface_DeePMDkit, please define environmental variable $DeePMDkit
to where dp binary is located (e.g "/home/xxx/deepmd-kit-1.2/bin").
Arguments with their default values:
MLprog=DeePMD-kit enables this interface
MLmodelType=S requests model S
DeepPot-SE [defaut]
DPMD
deepmd.stop_batch=4000000
number of batches to be trained before stopping
deepmd.batch_size=32
size of each batch
deepmd.start_lr=0.001
initial learning rate
deepmd.decay_steps=4000
number of batches for one decay of learning rate
deepmd.decay_rate=0.95
decay rate of each decay of learning rate
deepmd.rcut=6.0
cutoff radius for local environment
deepmd.start_pref_e/limit_pref_e/start_pref_f/limit_pref_f
parameters for loss prefactors
deepmd.sel=16 or deepmd.sel=a,b,c
number of selected atoms for each specie
deepmd.neuron=30,60
NN structure of descriptor
deepmd.n_neuron=80,80,80
NN structure of fitting network
deepmd.input=S file S with DeePMD input parameters
in json format
Cite DeePMD-kit:
H. Wang, L. Zhang, J. Han, W. E, Comput. Phys. Commun. 2018, 228, 178
Cite DeepPot-SE method, if you use it:
L.F. Zhang, J.Q. Han, H. Wang, W.A. Saidi, R. Car, W.N. E,
Adv. Neural. Inf. Process. Syst. 2018, 31, 4436
Cite DPMD method, if you use it:
L. Hang, J. Han, H. Wang, R. Car, W. E, Phys. Rev. Lett. 2018, 120, 143001
'''
print(helpText)