.. include:: api_overview.rst

.. _data:

Data 
====
.. automodule:: mlatom.data
    :members:
    :exclude-members: spectra_similarity

.. _api_models:

Models 
======

.. automodule:: mlatom.models
    :members: model

.. _methods:

methods
++++++++

.. automodule:: mlatom.models
    :members: methods

AIQM1
-----
.. automodule:: mlatom.aiqm1
    :members:

.. _ml_model:

ml_model
++++++++

.. automodule:: mlatom.models
    :members: ml_model, hyperparameters, hyperparameter, kreg, ani, dpmd, gap, physnet, sgdml, mace

.. _model_tree_node:

model_tree_node
+++++++++++++++

.. automodule:: mlatom.models
    :members: model_tree_node

Interfaces 
++++++++++
.. automodule:: mlatom.interfaces
    :members: 

TorchANI
--------

.. automodule:: mlatom.interfaces.torchani_interface
    :members: 

DeepMD-kit
----------

.. automodule:: mlatom.interfaces.dpmd_interface
    :members:

GAP/QUIP
--------

.. automodule:: mlatom.interfaces.gap_interface
    :members:

PhysNet
-------

.. automodule:: mlatom.interfaces.physnet_interface
    :members:

MACE
-------

.. automodule:: mlatom.interfaces.mace_interface
    :members:

sGDML
-----

.. automodule:: mlatom.interfaces.sgdml_interface
    :members:    

Gaussian
--------

.. automodule:: mlatom.interfaces.gaussian_interface
    :members:

Orca 
--------

.. automodule:: mlatom.interfaces.orca_interface
    :members:

DFT-D4
------

.. automodule:: mlatom.interfaces.dftd4_interface
    :members:

PySCF 
-----

.. automodule:: mlatom.interfaces.pyscf_interface
    :members:

Sparrow
-------

.. automodule:: mlatom.interfaces.sparrow_interface
    :members:

.. ORCA
.. ----

.. .. automodule:: mlatom.interfaces.orca 
..     :members:

xTB
---

.. automodule:: mlatom.interfaces.xtb_interface
    :members:

MNDO
----

.. automodule:: mlatom.interfaces.mndo_interface 
    :members:

Simulations
===========
.. automodule:: mlatom.simulations
    :members:

.. _api_initial_conditions:

Initial conditions
+++++++++++++++++++++
.. automodule:: mlatom.initial_conditions
    :members:

.. _MD:

Molecular dynamics
+++++++++++++++++++++
.. automodule:: mlatom.md 
    :members:

.. _api_namd:

Surface-hopping dynamics 
++++++++++++++++++++++++++++++++++++++++++
.. automodule:: mlatom.namd 
    :members:

Spectra 
=======

.. automodule:: mlatom.spectra
    :members: 

.. _api_al:

Active learning
===============

.. _api_al_initdatasampling:

Initial data sampling
++++++++++++++++++++++++++++++++++++++++++

``initdata_sampler`` can be:

- ``'wigner'``
- ``'harmonic-quantum-boltzmann'``

.. _api_al_mlmodel:

User-defined ML models
++++++++++++++++++++++++++++++++++++++++++

The user has the flexibility to create their own ML model class for AL. Minimum requirements to such a class:

- it must have the usual ``train`` and ``predict`` functions.
- the ``train`` function must accept ``molecular_database`` parameter.
- the ``predict`` function must accept ``molecule`` and/or ``molecular_database`` parameters.

The realistic, fully fledged example of how to create a usable ML model class is below (it is what we use in al routine!):

.. code-block:: python

    class my_model():
        def __init__(self, al_info = {}, model_file=None, device=None, verbose=False):
            import torch
            if device is None:
                device = 'cuda' if torch.cuda.is_available() else 'cpu'
            
            if model_file is None:
                if 'mlmodel_file' in al_info.keys():
                    self.model_file = al_info['mlmodel_file']
                else:
                    self.model_file = 'mlmodel'
                    al_info['mlmodel_file'] = self.model_file
            else: 
                self.model_file = model_file
                al_info['mlmodel_file'] = self.model_file
            if 'main_mlmodel_file' in al_info.keys():
                main_mlmodel_file = al_info['main_mlmodel_file']
            else:
                main_mlmodel_file = f'{self.model_file}.pt'
                al_info['main_mlmodel_file'] = main_mlmodel_file
            if 'aux_mlmodel_file' in al_info.keys():
                aux_mlmodel_file = al_info['aux_mlmodel_file']
            else:
                aux_mlmodel_file = f'aux_{self.model_file}.pt'
                al_info['aux_mlmodel_file'] = aux_mlmodel_file
            self.device = device
            self.verbose = verbose
            self.main_model = ml.models.ani(model_file=main_mlmodel_file,device=device,verbose=verbose)
            self.aux_model = ml.models.ani(model_file=aux_mlmodel_file,device=device,verbose=verbose)
        
        def train(self, molecular_database=None, al_info={}):
            if 'working_directory' in al_info.keys():
                workdir = al_info['working_directory']
                self.main_model.model_file = f'{workdir}/{self.model_file}.pt'
                self.aux_model.model_file = f'{workdir}/aux_{self.model_file}.pt'

            validation_set_fraction = 0.1
            [subtraindb, valdb] = molecular_database.split(number_of_splits=2, fraction_of_points_in_splits=[1-validation_set_fraction, validation_set_fraction], sampling='random')

            # train the model on energies and gradients
            self.main_model = ml.models.ani(model_file=self.main_model.model_file,device=self.device,verbose=self.verbose)
            self.main_model.train(molecular_database=subtraindb,validation_molecular_database=valdb,property_to_learn='energy',xyz_derivative_property_to_learn='energy_gradients')
            
            # train the auxiliary model only on energies
            self.aux_model = ml.models.ani(model_file=self.aux_main_model.model_file,device=self.device,verbose=self.verbose)
            self.aux_model.train(molecular_database=subtraindb,validation_molecular_database=valdb,property_to_learn='energy')

            if not 'uq_threshold' in al_info.keys():
                self.predict(molecular_database=valdb)
                uqs = valdb.get_property('uq')
                al_info['uq_threshold'] = np.median(uqs) + 3*stats.calc_median_absolute_deviation(uqs)
            self.uq_threshold = al_info['uq_threshold']

            # if the models were trained successfully, let's update al info where we can find them
            al_info['main_mlmodel_file'] = self.main_model.model_file
            al_info['aux_mlmodel_file'] = self.aux_model.model_file

        def predict(self, molecule=None, molecular_database=None):

            # predict energies and gradients with the main model
            self.main_model.predict(molecule=molecule, molecular_database=molecular_database,property_to_predict='energy',xyz_derivative_property_to_predict='energy_gradients')

            # predict energies with the auxiliary model
            self.aux_model.predict(molecule=molecule, molecular_database=molecular_database,property_to_predict='aux_energy')

            # calculate uncertainties
            moldb = molecular_database
            if moldb is None:
                moldb = ml.molecular_database()

            for mol in moldb:
                mol.uq = abs(mol.energy - mol.aux_energy)
                if mol.uq > self.uq_threshold:
                    mol.uncertain = True
                else:
                    mol.uncertain = False

        # This are useful in some internal al routines, e.g., when we want to make predictions in parallel (if nthreads is not set properly, it may slow down al significantly!)
        @property
        def nthreads(self):
            return self.main_model.nthreads
        
        @nthreads.setter
        def nthreads(self, value):
            self.main_model.nthreads = value
            self.aux_model.nthreads  = value

    ml.al(
        ...
        ml_model = my_model,
        # do not use my_model(...), if you want to pass any arguments, use ml_model_kwargs:
        ml_model_kwargs = {...}, # 'al_info' is unnecessary to include, it will be added automatically. If you supply 'al_info' key, it will overwrite the default one so use if you know what you are doing.
        ...
    )

As you can see, it is helpful (but not required) if the ``__init__`` and ``train`` functions of the ML model class also accept the ``al_info`` parameter which can be used to pass information during active learning from one routine to another.

.. _api_al_sampler:

Sampler
+++++++

Here is a realistic example of the sampler function used in the physics-informed active learning:

.. code-block:: python

    def my_sampler(al_info={}, ml_model=None, initcond_sampler=None, initcond_sampler_kwargs={}, maximum_propagation_time=1000, time_step=0.1, ensemble='NVE', thermostat=None, dump_trajs=False, dump_trajectory_interval=None, stop_function=None, batch_parallelization=True):
        
        moldb2label = ml.data.molecular_database()
        
        # generate initial conditions
        if type(initcond_sampler) == str:
            if initcond_sampler.casefold() in ['wigner', 'harmonic-quantum-boltzmann']:
                initcond_sampler = ml.generate_initial_conditions
                initcond_sampler_kwargs['generation_method'] = initcond_sampler
        import inspect
        args, varargs, varkw, defaults = inspect.getargspec(initcond_sampler)
        # Do we need al_info below?
        if 'al_info' in args:
            initial_molecular_database = initcond_sampler(al_info=al_info, **initcond_sampler_kwargs)
        else:
            initial_molecular_database = initcond_sampler(**initcond_sampler_kwargs)

        # run MD in parallel to collect uncertain points
        if batch_parallelization: # Faster way to propagate many trajs with ML
            dyn = ml.md_parallel(model=ml_model,
                                 molecular_database=initial_molecular_database,
                                 ensemble=ensemble,
                                 thermostat=thermostat,
                                 time_step=time_step,
                                 maximum_propagation_time=maximum_propagation_time,
                                 dump_trajectory_interval=dump_trajectory_interval,
                                 stop_function=stop_function)
            trajs = dyn.molecular_trajectory 
            for itraj in range(len(trajs.steps[0])):
                print(f"Trajectory {itraj} number of steps: {trajs.traj_len[itraj]}")
                if trajs.steps[trajs.traj_len[itraj]][itraj].uncertain:
                    print(f'Adding molecule from trajectory {itraj} at time {trajs.traj_len[itraj]*time_step} fs')
                    moldb2label.molecules.append(trajs.steps[trajs.traj_len[itraj]][itraj])

                # Dump traj
                if dump_trajs:
                    import os
                    traj = ml.data.molecular_trajectory()
                    for istep in range(trajs.traj_len[itraj]+1):
                        
                        step = ml.data.molecular_trajectory_step()
                        step.step = istep 
                        step.time = istep * time_step
                        step.molecule = trajs.steps[istep][itraj]
                        traj.steps.append(step)
                    if 'working_directory' in al_info.keys():
                        dirname = f'{al_info['working_directory']}/trajs'
                    else:
                        dirname = 'trajs'
                    if not os.path.exists(dirname):
                        os.makedirs(dirname)
                    traj.dump(f"{dirname}/traj{itraj}.h5",format='h5md')
        else:
            md_kwargs = {
                        'molecular_database': initial_molecular_database,
                        'model': ml_model,
                        'time_step': time_step,
                        'maximum_propagation_time': maximum_propagation_time,
                        'ensemble': ensemble,
                        'thermostat': thermostat,
                        'dump_trajectory_interval': dump_trajectory_interval,
                        'stop_function': stop_function
                        }
            dyns = ml.simulations.run_in_parallel(molecular_database=initial_molecular_database,
                                                task=ml.md,
                                                task_kwargs=md_kwargs,
                                                create_and_keep_temp_directories=False)
            trajs = [d.molecular_trajectory for d in dyns]
            itraj=0 
            for traj in trajs:
                itraj+=1 
                print(f"Trajectory {itraj} number of steps: {len(traj.steps)}")
                if traj.steps[-1].molecule.uncertain:
                    print('Adding molecule from trajectory %d at time %.2f fs' % (itraj, traj.steps[-1].time))
                    moldb2label.molecules.append(traj.steps[-1].molecule)

                # Dump traj
                if dump_trajs:
                    import os
                    if 'working_directory' in al_info.keys():
                        dirname = f'{al_info['working_directory']}/trajs'
                    else:
                        dirname = 'trajs'
                    if not os.path.exists(dirname):
                        os.makedirs(dirname)
                    traj.dump(f"{dirname}/traj{itraj}.h5",format='h5md')
        # add the source of molecule
        for mol in moldb2label:
            mol.sampling = 'md'
        return moldb2label

    ml.al(
        ...
        sampler=my_sampler,
        sampler_kwargs={'time_step': 0.5},
        ...
    )

Analysis 
========

.. automodule:: mlatom.xyz
    :members: