'''
.. code-block::
!---------------------------------------------------------------------------!
! Interface_DeePMDkit: Interface between DeePMD-kit and MLatom !
! Implementations by: Fuchun Ge !
!---------------------------------------------------------------------------!
'''
from __future__ import annotations
from typing import Any, Union, Dict, Callable
import os, sys, uuid, time, tempfile, subprocess, json
import numpy as np
from .. import constants
from .. import data
from .. import models
from .. import stopper
from ..decorators import doc_inherit
if 'DeePMDkit' in os.environ:
DeePMDdir = os.environ['DeePMDkit']
DeePMDbin = DeePMDdir+'/dp'
else:
print('please set $DeePMDkit')
DeePMDbin = ''
def molDB2DPdata(dir_name, molecular_database, validation_molecular_database, type_map=None,
property_to_learn=None,
xyz_derivative_property_to_learn=None):
if os.path.exists(dir_name):
if not os.path.isdir(dir_name):
print(f'file {dir_name} already exists')
return
else:
os.mkdir(dir_name)
if os.path.exists(f'{dir_name}/set.000'):
print(f'{dir_name}/set.000 already exists')
return
else:
os.mkdir(f'{dir_name}/set.000')
if os.path.exists(f'{dir_name}/set.001'):
print(f'{dir_name}/set.001 already exists')
return
else:
os.mkdir(f'{dir_name}/set.001')
if type_map is None:
# type_map = list(np.sort(np.unique(molDB.get_element_symbols())))
type_map = list(sorted(set(molecular_database[0].get_element_symbols()), key=lambda x: list(molDB[0].get_element_symbols()).index(x)))
with open(f'{dir_name}/type.raw', 'w') as f:
for element in molecular_database[0].get_element_symbols():
f.write("%d " % type_map.index(element))
np.save(f'{dir_name}/set.000/box.npy', np.repeat(100 * np.eye(3).reshape([1, -1]), len(molecular_database), axis=0))
np.save(f'{dir_name}/set.001/box.npy', np.repeat(100 * np.eye(3).reshape([1, -1]), len(validation_molecular_database), axis=0))
np.save(f'{dir_name}/set.000/coord.npy', molecular_database.xyz_coordinates.reshape(len(molecular_database), -1))
np.save(f'{dir_name}/set.001/coord.npy', validation_molecular_database.xyz_coordinates.reshape(len(validation_molecular_database), -1))
if property_to_learn:
np.save(f'{dir_name}/set.000/energy.npy', molecular_database.get_properties(property_to_learn))
np.save(f'{dir_name}/set.001/energy.npy', validation_molecular_database.get_properties(property_to_learn))
if xyz_derivative_property_to_learn:
np.save(f'{dir_name}/set.000/force.npy', -1 * molecular_database.get_xyz_vectorial_properties(xyz_derivative_property_to_learn).reshape(len(molecular_database), -1))
np.save(f'{dir_name}/set.001/force.npy', -1 * validation_molecular_database.get_xyz_vectorial_properties(xyz_derivative_property_to_learn).reshape(len(validation_molecular_database), -1))
[文档]
class dpmd(models.ml_model, models.tensorflow_model):
'''
Create an `DeepPot-SE <https://proceedings.neurips.cc/paper_files/paper/2018/hash/e2ad76f2326fbc6b56a45a56c59fafdb-Abstract.html>`_ model object.
Interfaces to `DeepMD-kit <https://doi.org/10.1016/j.cpc.2018.03.016>`_.
Arguments:
model_file (str, optional): The filename that the model to be saved with or loaded from.
hyperparameters (Dict[str, Any] | :class:`mlatom.models.hyperparameters`, optional): Updates the hyperparameters of the model with provided.
verbose (int, optional): 0 for silence, 1 for verbosity.
'''
hyperparameters = models.hyperparameters({
'early_stopping': models.hyperparameter(value=True, choices=[True, False], optimization_space='choices', dtype=bool),
'early_stopping_paticence': models.hyperparameter(value=60, minval=16, maxval=256, optimization_space='linear', dtype=int),
'early_stopping_threshold': models.hyperparameter(value=0.0000, minval=-0.0001, maxval=0.0001, optimization_space='linear', dtype=float),
'batch_size': models.hyperparameter(value=4, minval=1, maxval=1024, optimization_space='linear', dtype=int),
'disp_freq': models.hyperparameter(value=100, minval=10, maxval=10000, optimization_space='linear', dtype=int),
'stop_batch': models.hyperparameter(value=400000, minval=100, maxval=1000000, optimization_space='linear', dtype=int),
"start_lr": models.hyperparameter(value=0.005, minval=0.0001, maxval=0.01, optimization_space='log', dtype=float),
"decay_steps": models.hyperparameter(value=2000, minval=100, maxval=10000, optimization_space='log', dtype=int),
"decay_rate": models.hyperparameter(value=0.95, minval=0.5, maxval=1, optimization_space='log', dtype=float),
"start_pref_e": models.hyperparameter(value=0.02, minval=0, maxval=100, optimization_space='log', dtype=float),
"limit_pref_e": models.hyperparameter(value=2, minval=0, maxval=1000, optimization_space='log', dtype=float),
"start_pref_f": models.hyperparameter(value=1000, minval=0, maxval=1000, optimization_space='log', dtype=float),
"limit_pref_f": models.hyperparameter(value=1, minval=0, maxval=1000, optimization_space='log', dtype=float),
"start_pref_v": models.hyperparameter(value=0, minval=0, maxval=1000, optimization_space='log', dtype=float),
"limit_pref_v": models.hyperparameter(value=0, minval=0, maxval=1000, optimization_space='log', dtype=float),
'rcut': models.hyperparameter(value=6.0, minval=1.0, maxval=10.0, optimization_space='linear', dtype=float),
'neuron': models.hyperparameter('30,60'),
'n_neuron': models.hyperparameter('80,80,80'),
'sel': models.hyperparameter(value=16, minval=1, maxval=32, optimization_space='linear', dtype=str),
})
json = {
"model": {
"descriptor": {
"type": "se_a",
"sel": [
16,
16,
16
],
"rcut_smth": 6.3,
"rcut": 6.5,
"neuron": [
30,
60
],
"resnet_dt": False,
"axis_neuron": 6,
"seed": 42
},
"fitting_net": {
"n_neuron": [
80,
80,
80
],
"resnet_dt": True,
"seed": 42
},
"type_map": [
"C",
"H",
"O"
]
},
"learning_rate": {
"type": "exp",
"start_lr": 0.005,
"decay_steps": 2000,
"decay_rate": 0.95
},
"loss": {
"start_pref_e": 0.02,
"limit_pref_e": 2,
"start_pref_f": 1000,
"limit_pref_f": 1,
"start_pref_v": 0,
"limit_pref_v": 0
},
"training": {
"systems": [
"./"
],
"set_prefix": "set",
"stop_batch": 400000,
"batch_size": 4,
"seed": 42,
"disp_file": "lcurve.out",
"disp_freq": 100,
"numb_test": 10,
"save_freq": 100,
"save_ckpt": "model.ckpt",
"load_ckpt": "model.ckpt",
"disp_training": True,
"time_training": True,
"profiling": False,
"profiling_file": "timeline.json"
}
}
type_map = []
json_input = None
property_name = 'y'
program = 'DeepMD-kit'
meta_data = {
"genre": "neural network"
}
model_file = None
model = None
verbose = False
def __init__(self, model_file=None, hyperparameters={}, verbose=1,):
self.hyperparameters = self.hyperparameters.copy()
self.hyperparameters.update(hyperparameters)
self.verbose = verbose
if model_file:
self.model_file = model_file
if os.path.exists(self.model_file):
if os.path.exists(f"{self.model_file}.json"):
with open(f"{self.model_file}.json") as f:
self.json = json.load(f)
self.type_map = self.json["model"]["type_map"]
else:
self.model_file = f'DPMD_{str(uuid.uuid4())}.pb'
def parse_args(self, args):
super().parse_args(args)
for hyperparam in self.hyperparameters:
if hyperparam in args.hyperparameter_optimization['hyperparameters']:
self.parse_hyperparameter_optimization(args, hyperparam)
# elif hyperparam in args.data:
# self.hyperparameters[hyperparam].value = args.data[hyperparam]
elif 'deepmd' in args.data and hyperparam in args.deepmd.data:
self.hyperparameters[hyperparam].value = args.deepmd.data[hyperparam]
if 'deepmd' in args.data:
if 'input' in args.deepmd.data:
self.update_json(json_input=args.deepmd.input)
def update_json(self, **kwargs):
if 'json_input' in kwargs and kwargs['json_input']:
with open(kwargs['json_input'], 'r') as f:
self.json.update(json.load(f))
self.json['input'] = kwargs['json_input']
else:
self.json["model"]["type_map"] = self.type_map
if len(self.json['model']['descriptor']['sel']) != len(self.type_map):
self.json['model']['descriptor']['sel'] = [self.json['model']['descriptor']['sel'][0]] * len(self.type_map)
self.json['training']['batch_size'] = kwargs['batch_size'].value
self.json['training']['stop_batch'] = kwargs['stop_batch'].value
self.json['training']['disp_freq'] = kwargs['disp_freq'].value
self.json['training']['save_freq'] = kwargs['disp_freq'].value
self.json['learning_rate']["start_lr"] = kwargs["start_lr"].value
self.json['learning_rate']["decay_steps"] = kwargs["decay_steps"].value
self.json['learning_rate']["decay_rate"] = kwargs["decay_rate"].value
self.json['loss']["start_pref_e"] = kwargs["start_pref_e"].value
self.json['loss']["limit_pref_e"] = kwargs["limit_pref_e"].value
self.json['loss']["start_pref_f"] = kwargs["start_pref_f"].value
self.json['loss']["limit_pref_f"] = kwargs["limit_pref_f"].value
self.json['loss']["start_pref_v"] = kwargs["start_pref_v"].value
self.json['loss']["limit_pref_v"] = kwargs["limit_pref_v"].value
if 'property_to_learn' in kwargs and not kwargs['property_to_learn']:
self.json['loss']['limit_pref_e'] = 0
self.json['loss']['start_pref_e'] = 0
if 'xyz_derivative_property_to_learn' in kwargs and not kwargs['xyz_derivative_property_to_learn']:
self.json['loss']['limit_pref_f'] = 0
self.json['loss']['start_pref_f'] = 0
self.json['model']["descriptor"]['sel'] = [int(i) for i in kwargs["sel"].value.strip('[]').split(',')]
if len(self.json['model']["descriptor"]['sel']) == 1:
self.json['model']["descriptor"]['sel'] = self.json['model']["descriptor"]['sel'] * len(self.json["model"]["type_map"])
self.json['model']["descriptor"]['rcut'] = kwargs["rcut"].value
self.json['model']["descriptor"]['neuron'] = [int(i) for i in kwargs["neuron"].value.strip('[]').split(',')]
self.json['model']["fitting_net"]['n_neuron'] = [int(i) for i in kwargs["n_neuron"].value.strip('[]').split(',')]
self.type_map = self.json["model"]["type_map"]
self.json['training']['set_prefix'] = 'set'
if 'dirname' in kwargs:
self.json['training']['systems'] = kwargs['dirname']
with open(f"{self.model_file}.json", "w") as f:
json.dump(self.json, f, indent=4)
[文档]
def train(
self,
molecular_database: data.molecular_database,
property_to_learn: str = 'energy',
xyz_derivative_property_to_learn: str = None,
validation_molecular_database: Union[data.molecular_database, str, None] = 'sample_from_molecular_database',
hyperparameters: Union[Dict[str,Any], models.hyperparameters] = {},
spliting_ratio=0.8,
stdout=None,
stderr=None,
json_input=None,
dirname=None,
):
self.hyperparameters.update(hyperparameters)
FNULL = open(os.devnull, 'w')
if not self.verbose:
if not stdout:
stdout = FNULL
if not stderr:
stderr = FNULL
else:
stdout = sys.stdout
stderr = sys.stderr
if os.path.exists(self.model_file):
print(f'{self.model_file} exists')
self.model_file = f'DPMD_{str(uuid.uuid4())}.pb'
if self.verbose:
print(f'Trained DPMD model will be saved in {self.model_file}.')
if dirname:
rundir = dirname
else:
tmpdir = tempfile.TemporaryDirectory()
rundir = tmpdir.name
if not self.type_map:
self.type_map = list(sorted(set(molecular_database[0].get_element_symbols()), key=lambda x: list(molecular_database[0].get_element_symbols()).index(x)))
self.update_json(**self.hyperparameters,
json_input=json_input if json_input else self.json_input,
dirname=rundir,
property_to_learn=property_to_learn,
xyz_derivative_property_to_learn=xyz_derivative_property_to_learn)
if validation_molecular_database == 'sample_from_molecular_database':
idx = np.arange(len(molecular_database))
np.random.shuffle(idx)
molecular_database, validation_molecular_database = [molecular_database[i_split] for i_split in np.split(idx, [int(len(idx) * spliting_ratio)])]
elif not validation_molecular_database:
raise NotImplementedError("please specify validation_molecular_database or set it to 'sample_from_molecular_database'")
molDB2DPdata(rundir, molecular_database, validation_molecular_database, self.json["model"]["type_map"], property_to_learn, xyz_derivative_property_to_learn)
if self.hyperparameters['early_stopping'].value:
earlyStop = earlyStopCls(
patience=self.hyperparameters['early_stopping_paticence'].value, threshold=self.hyperparameters['early_stopping_threshold'].value
)
if self.verbose:
print(f' > {DeePMDbin} train {self.model_file}.json')
sys.stdout.flush()
if os.path.exists(self.json['training']['disp_file']):
os.remove(self.json['training']['disp_file'])
proc = subprocess.Popen([DeePMDbin, "train", f"{self.model_file}.json"], stdout=subprocess.PIPE ,stderr=stderr, universal_newlines=True)
for line in proc.stdout:
if self.verbose:
print(line.replace('\n',''))
if self.hyperparameters['early_stopping'].value:
try:
lastline=subprocess.check_output(['tail', '-1', self.json['training']['disp_file']], stderr=FNULL).split()
loss = float(lastline[1])
n_batch = float(lastline[0])
if earlyStop.current(loss, n_batch, shutup=self.shutup):
print('met early-stopping conditions')
sys.stdout.flush()
proc.terminate()
except:
pass
sys.stdout.flush()
if not dirname:
tmpdir.cleanup()
if self.verbose:
print(' > %s freeze -o %s' % (DeePMDbin, self.model_file))
sys.stdout.flush()
subprocess.call([DeePMDbin, "freeze", "-o", self.model_file], stdout=stdout,stderr=stderr)
if os.path.exists(self.model_file) and self.verbose:
print('model saved in %s' % self.model_file)
FNULL.close()
[文档]
@doc_inherit
def predict(
self,
molecular_database: data.molecular_database = None,
molecule: data.molecule = None,
calculate_energy: bool = False,
calculate_energy_gradients: bool = False,
calculate_hessian: bool = False,
property_to_predict: Union[str, None] = 'estimated_y',
xyz_derivative_property_to_predict: Union[str, None] = None,
hessian_to_predict: Union[str, None] = None,
) -> None:
molDB, property_to_predict, xyz_derivative_property_to_predict, hessian_to_predict = \
super().predict(molecular_database=molecular_database, molecule=molecule, calculate_energy=calculate_energy, calculate_energy_gradients=calculate_energy_gradients, calculate_hessian=calculate_hessian, property_to_predict = property_to_predict, xyz_derivative_property_to_predict = xyz_derivative_property_to_predict, hessian_to_predict = hessian_to_predict)
with tempfile.TemporaryDirectory() as tmpdirname:
np.save(f'{tmpdirname}/coord.npy', molDB.xyz_coordinates.reshape(len(molDB), -1))
if not self.type_map:
self.type_map = list(sorted(set(molDB[0].get_element_symbols()), key=lambda x: list(molDB[0].get_element_symbols()).index(x)))
atom_type = [self.type_map.index(element) for element in molDB[0].get_element_symbols()]
code = f'''import deepmd.DeepPot as DP
import numpy as np
import sys
coord = np.load('{tmpdirname}/coord.npy')
cell = np.repeat(100 * np.eye(3).reshape([1, -1]), len(coord), axis=0)
atom_type = {atom_type}
dp = DP('{self.model_file}')
e, f, v = dp.eval(coord, cell, atom_type)
np.save('{tmpdirname}/energy.npy', e)
np.save('{tmpdirname}/force.npy', f)
'''
FNULL = open(os.devnull, 'w')
subprocess.call([DeePMDdir+"/python", "-c", code],stdout=FNULL , stderr=FNULL)
FNULL.close()
if property_to_predict:
enest = np.load(f'{tmpdirname}/energy.npy').flatten()
molDB.add_scalar_properties(enest, property_to_predict)
if xyz_derivative_property_to_predict:
gradest = np.load(f'{tmpdirname}/force.npy') * -1
molDB.add_xyz_vectorial_properties(gradest, xyz_derivative_property_to_predict)
class earlyStopCls():
def __init__(self, patience = 60, threshold = 0.0001 ):
self.bestloss = np.inf
self.patience = patience
self.threshold = threshold
self.counter = 0
self.last_batch =0
def current(self, loss, n_batch, verbose=1):
if n_batch == self.last_batch:
return False
else:
self.last_batch = n_batch
if loss < (1 - self.threshold)*self.bestloss:
self.bestloss = loss
self.counter = 0
else:
self.counter += 1
if verbose:
print('bestloss: %s early-stopping counter: %s/%s'% (self.bestloss, self.counter, self.patience))
sys.stdout.flush()
return self.counter > self.patience
def printHelp():
helpText = __doc__.replace('.. code-block::\n\n', '') + '''
To use Interface_DeePMDkit, please define environmental variable $DeePMDkit
to where dp binary is located (e.g "/home/xxx/deepmd-kit-1.2/bin").
Arguments with their default values:
MLprog=DeePMD-kit enables this interface
MLmodelType=S requests model S
DeepPot-SE [defaut]
DPMD
deepmd.stop_batch=4000000
number of batches to be trained before stopping
deepmd.batch_size=32
size of each batch
deepmd.start_lr=0.001
initial learning rate
deepmd.decay_steps=4000
number of batches for one decay of learning rate
deepmd.decay_rate=0.95
decay rate of each decay of learning rate
deepmd.rcut=6.0
cutoff radius for local environment
deepmd.start_pref_e/limit_pref_e/start_pref_f/limit_pref_f
parameters for loss prefactors
deepmd.sel=16 or deepmd.sel=a,b,c
number of selected atoms for each specie
deepmd.neuron=30,60
NN structure of descriptor
deepmd.n_neuron=80,80,80
NN structure of fitting network
deepmd.input=S file S with DeePMD input parameters
in json format
Cite DeePMD-kit:
H. Wang, L. Zhang, J. Han, W. E, Comput. Phys. Commun. 2018, 228, 178
Cite DeepPot-SE method, if you use it:
L.F. Zhang, J.Q. Han, H. Wang, W.A. Saidi, R. Car, W.N. E,
Adv. Neural. Inf. Process. Syst. 2018, 31, 4436
Cite DPMD method, if you use it:
L. Hang, J. Han, H. Wang, R. Car, W. E, Phys. Rev. Lett. 2018, 120, 143001
'''
print(helpText)