.. include:: api_overview.rst .. _data: Data ==== .. automodule:: mlatom.data :members: :exclude-members: spectra_similarity .. _api_models: Models ====== .. automodule:: mlatom.models :members: model .. _methods: methods ++++++++ .. automodule:: mlatom.models :members: methods AIQM1 ----- .. automodule:: mlatom.aiqm1 :members: .. _ml_model: ml_model ++++++++ .. automodule:: mlatom.models :members: ml_model, hyperparameters, hyperparameter, kreg, ani, dpmd, gap, physnet, sgdml, mace .. _model_tree_node: model_tree_node +++++++++++++++ .. automodule:: mlatom.models :members: model_tree_node Interfaces ++++++++++ .. automodule:: mlatom.interfaces :members: TorchANI -------- .. automodule:: mlatom.interfaces.torchani_interface :members: DeepMD-kit ---------- .. automodule:: mlatom.interfaces.dpmd_interface :members: GAP/QUIP -------- .. automodule:: mlatom.interfaces.gap_interface :members: PhysNet ------- .. automodule:: mlatom.interfaces.physnet_interface :members: MACE ------- .. automodule:: mlatom.interfaces.mace_interface :members: sGDML ----- .. automodule:: mlatom.interfaces.sgdml_interface :members: Gaussian -------- .. automodule:: mlatom.interfaces.gaussian_interface :members: Orca -------- .. automodule:: mlatom.interfaces.orca_interface :members: DFT-D4 ------ .. automodule:: mlatom.interfaces.dftd4_interface :members: PySCF ----- .. automodule:: mlatom.interfaces.pyscf_interface :members: Sparrow ------- .. automodule:: mlatom.interfaces.sparrow_interface :members: .. ORCA .. ---- .. .. automodule:: mlatom.interfaces.orca .. :members: xTB --- .. automodule:: mlatom.interfaces.xtb_interface :members: MNDO ---- .. automodule:: mlatom.interfaces.mndo_interface :members: Simulations =========== .. automodule:: mlatom.simulations :members: .. _api_initial_conditions: Initial conditions +++++++++++++++++++++ .. automodule:: mlatom.initial_conditions :members: .. _MD: Molecular dynamics +++++++++++++++++++++ .. automodule:: mlatom.md :members: .. _api_namd: Surface-hopping dynamics ++++++++++++++++++++++++++++++++++++++++++ .. automodule:: mlatom.namd :members: Spectra ======= .. automodule:: mlatom.spectra :members: .. _api_al: Active learning =============== .. _api_al_initdatasampling: Initial data sampling ++++++++++++++++++++++++++++++++++++++++++ ``initdata_sampler`` can be: - ``'wigner'`` - ``'harmonic-quantum-boltzmann'`` .. _api_al_mlmodel: User-defined ML models ++++++++++++++++++++++++++++++++++++++++++ The user has the flexibility to create their own ML model class for AL. Minimum requirements to such a class: - it must have the usual ``train`` and ``predict`` functions. - the ``train`` function must accept ``molecular_database`` parameter. - the ``predict`` function must accept ``molecule`` and/or ``molecular_database`` parameters. The realistic, fully fledged example of how to create a usable ML model class is below (it is what we use in al routine!): .. code-block:: python class my_model(): def __init__(self, al_info = {}, model_file=None, device=None, verbose=False): import torch if device is None: device = 'cuda' if torch.cuda.is_available() else 'cpu' if model_file is None: if 'mlmodel_file' in al_info.keys(): self.model_file = al_info['mlmodel_file'] else: self.model_file = 'mlmodel' al_info['mlmodel_file'] = self.model_file else: self.model_file = model_file al_info['mlmodel_file'] = self.model_file if 'main_mlmodel_file' in al_info.keys(): main_mlmodel_file = al_info['main_mlmodel_file'] else: main_mlmodel_file = f'{self.model_file}.pt' al_info['main_mlmodel_file'] = main_mlmodel_file if 'aux_mlmodel_file' in al_info.keys(): aux_mlmodel_file = al_info['aux_mlmodel_file'] else: aux_mlmodel_file = f'aux_{self.model_file}.pt' al_info['aux_mlmodel_file'] = aux_mlmodel_file self.device = device self.verbose = verbose self.main_model = ml.models.ani(model_file=main_mlmodel_file,device=device,verbose=verbose) self.aux_model = ml.models.ani(model_file=aux_mlmodel_file,device=device,verbose=verbose) def train(self, molecular_database=None, al_info={}): if 'working_directory' in al_info.keys(): workdir = al_info['working_directory'] self.main_model.model_file = f'{workdir}/{self.model_file}.pt' self.aux_model.model_file = f'{workdir}/aux_{self.model_file}.pt' validation_set_fraction = 0.1 [subtraindb, valdb] = molecular_database.split(number_of_splits=2, fraction_of_points_in_splits=[1-validation_set_fraction, validation_set_fraction], sampling='random') # train the model on energies and gradients self.main_model = ml.models.ani(model_file=self.main_model.model_file,device=self.device,verbose=self.verbose) self.main_model.train(molecular_database=subtraindb,validation_molecular_database=valdb,property_to_learn='energy',xyz_derivative_property_to_learn='energy_gradients') # train the auxiliary model only on energies self.aux_model = ml.models.ani(model_file=self.aux_main_model.model_file,device=self.device,verbose=self.verbose) self.aux_model.train(molecular_database=subtraindb,validation_molecular_database=valdb,property_to_learn='energy') if not 'uq_threshold' in al_info.keys(): self.predict(molecular_database=valdb) uqs = valdb.get_property('uq') al_info['uq_threshold'] = np.median(uqs) + 3*stats.calc_median_absolute_deviation(uqs) self.uq_threshold = al_info['uq_threshold'] # if the models were trained successfully, let's update al info where we can find them al_info['main_mlmodel_file'] = self.main_model.model_file al_info['aux_mlmodel_file'] = self.aux_model.model_file def predict(self, molecule=None, molecular_database=None): # predict energies and gradients with the main model self.main_model.predict(molecule=molecule, molecular_database=molecular_database,property_to_predict='energy',xyz_derivative_property_to_predict='energy_gradients') # predict energies with the auxiliary model self.aux_model.predict(molecule=molecule, molecular_database=molecular_database,property_to_predict='aux_energy') # calculate uncertainties moldb = molecular_database if moldb is None: moldb = ml.molecular_database() for mol in moldb: mol.uq = abs(mol.energy - mol.aux_energy) if mol.uq > self.uq_threshold: mol.uncertain = True else: mol.uncertain = False # This are useful in some internal al routines, e.g., when we want to make predictions in parallel (if nthreads is not set properly, it may slow down al significantly!) @property def nthreads(self): return self.main_model.nthreads @nthreads.setter def nthreads(self, value): self.main_model.nthreads = value self.aux_model.nthreads = value ml.al( ... ml_model = my_model, # do not use my_model(...), if you want to pass any arguments, use ml_model_kwargs: ml_model_kwargs = {...}, # 'al_info' is unnecessary to include, it will be added automatically. If you supply 'al_info' key, it will overwrite the default one so use if you know what you are doing. ... ) As you can see, it is helpful (but not required) if the ``__init__`` and ``train`` functions of the ML model class also accept the ``al_info`` parameter which can be used to pass information during active learning from one routine to another. .. _api_al_sampler: Sampler +++++++ Here is a realistic example of the sampler function used in the physics-informed active learning: .. code-block:: python def my_sampler(al_info={}, ml_model=None, initcond_sampler=None, initcond_sampler_kwargs={}, maximum_propagation_time=1000, time_step=0.1, ensemble='NVE', thermostat=None, dump_trajs=False, dump_trajectory_interval=None, stop_function=None, batch_parallelization=True): moldb2label = ml.data.molecular_database() # generate initial conditions if type(initcond_sampler) == str: if initcond_sampler.casefold() in ['wigner', 'harmonic-quantum-boltzmann']: initcond_sampler = ml.generate_initial_conditions initcond_sampler_kwargs['generation_method'] = initcond_sampler import inspect args, varargs, varkw, defaults = inspect.getargspec(initcond_sampler) # Do we need al_info below? if 'al_info' in args: initial_molecular_database = initcond_sampler(al_info=al_info, **initcond_sampler_kwargs) else: initial_molecular_database = initcond_sampler(**initcond_sampler_kwargs) # run MD in parallel to collect uncertain points if batch_parallelization: # Faster way to propagate many trajs with ML dyn = ml.md_parallel(model=ml_model, molecular_database=initial_molecular_database, ensemble=ensemble, thermostat=thermostat, time_step=time_step, maximum_propagation_time=maximum_propagation_time, dump_trajectory_interval=dump_trajectory_interval, stop_function=stop_function) trajs = dyn.molecular_trajectory for itraj in range(len(trajs.steps[0])): print(f"Trajectory {itraj} number of steps: {trajs.traj_len[itraj]}") if trajs.steps[trajs.traj_len[itraj]][itraj].uncertain: print(f'Adding molecule from trajectory {itraj} at time {trajs.traj_len[itraj]*time_step} fs') moldb2label.molecules.append(trajs.steps[trajs.traj_len[itraj]][itraj]) # Dump traj if dump_trajs: import os traj = ml.data.molecular_trajectory() for istep in range(trajs.traj_len[itraj]+1): step = ml.data.molecular_trajectory_step() step.step = istep step.time = istep * time_step step.molecule = trajs.steps[istep][itraj] traj.steps.append(step) if 'working_directory' in al_info.keys(): dirname = f'{al_info['working_directory']}/trajs' else: dirname = 'trajs' if not os.path.exists(dirname): os.makedirs(dirname) traj.dump(f"{dirname}/traj{itraj}.h5",format='h5md') else: md_kwargs = { 'molecular_database': initial_molecular_database, 'model': ml_model, 'time_step': time_step, 'maximum_propagation_time': maximum_propagation_time, 'ensemble': ensemble, 'thermostat': thermostat, 'dump_trajectory_interval': dump_trajectory_interval, 'stop_function': stop_function } dyns = ml.simulations.run_in_parallel(molecular_database=initial_molecular_database, task=ml.md, task_kwargs=md_kwargs, create_and_keep_temp_directories=False) trajs = [d.molecular_trajectory for d in dyns] itraj=0 for traj in trajs: itraj+=1 print(f"Trajectory {itraj} number of steps: {len(traj.steps)}") if traj.steps[-1].molecule.uncertain: print('Adding molecule from trajectory %d at time %.2f fs' % (itraj, traj.steps[-1].time)) moldb2label.molecules.append(traj.steps[-1].molecule) # Dump traj if dump_trajs: import os if 'working_directory' in al_info.keys(): dirname = f'{al_info['working_directory']}/trajs' else: dirname = 'trajs' if not os.path.exists(dirname): os.makedirs(dirname) traj.dump(f"{dirname}/traj{itraj}.h5",format='h5md') # add the source of molecule for mol in moldb2label: mol.sampling = 'md' return moldb2label ml.al( ... sampler=my_sampler, sampler_kwargs={'time_step': 0.5}, ... ) Analysis ======== .. automodule:: mlatom.xyz :members: