addition of particle guidance

gcorso · gcorso · commit 5f713b42d700 · 2023-10-10T16:19:20.000-04:00
diff --git a/README.md b/README.md
@@ -67,13 +67,44 @@ Then to test it:
 
     python test_boltzmann.py --model_dir workdir/boltz_T500 --temp 500 --model_steps 20 --original_model_dir /workdir/drugs_seed_boltz/ --out boltzmann.out
 
+
+## Particle Guidance sampling
+
+In [this manuscript]() we propose a new sampling method for jointly sampling a set of particles using diffusion models that we call particle guidance. We demonstrate that for the task of molecular conformer generation this provides significant improvements in precision and recall compared to standard I.I.D. diffusion sampling. To run the particle guidance sampling with torsional diffusion to replicate the results of the paper (similarly you can run on your own molecules)
+
+For the permutation invariant kernel guidance (higher quality, slower):
+
+    # minimizing recall error
+    python generate_confs.py --tqdm --batch_size 128 --no_energy --inference_steps=20 --model_dir=workdir/drugs_default --test_csv=data/DRUGS/test_smiles.csv --pg_invariant=True --pg_kernel_size_log_0=1.7565691770646286 --pg_kernel_size_log_1=1.1960868735428605 --pg_langevin_weight_log_0=-2.2245183818892103 --pg_langevin_weight_log_1=-2.403905082248579 --pg_repulsive_weight_log_0=-2.158537381110402 --pg_repulsive_weight_log_1=-2.717482077162461 --pg_weight_log_0=0.8004013644746992 --pg_weight_log_1=-0.9255658381081596
+    # minimizing precision error
+    python generate_confs.py --tqdm --batch_size 128 --no_energy --inference_steps=20 --model_dir=workdir/drugs_default --test_csv=data/DRUGS/test_smiles.csv --pg_invariant=True --pg_kernel_size_log_0=-0.9686202580381296 --pg_kernel_size_log_1=-0.7808409291022302 --pg_langevin_weight_log_0=-2.434216242826782 --pg_langevin_weight_log_1=-0.2602238633333869 --pg_repulsive_weight_log_0=-2.0439285313973237 --pg_repulsive_weight_log_1=-1.468234554877924 --pg_weight_log_0=0.3495680598729498 --pg_weight_log_1=-0.22001939454654185
+
+
+For the non-permutation invariant kernel guidance (faster, slightly lower quality, but still better than I.I.D.):
+
+    # minimizing recall error
+    python generate_confs.py --tqdm --batch_size 128 --no_energy --inference_steps=20 --model_dir=workdir/drugs_default --test_csv=data/DRUGS/test_smiles.csv --pg_kernel_size_log_0=2.35958 --pg_kernel_size_log_1=-0.78826 --pg_langevin_weight_log_0=-1.55054 --pg_langevin_weight_log_1=-2.70316 --pg_repulsive_weight_log_0=1.01317 --pg_repulsive_weight_log_1=-2.68407 --pg_weight_log_0=0.60504 --pg_weight_log_1=-1.15020
+    # minimizing precision error
+    python generate_confs.py --tqdm --batch_size 128 --no_energy --inference_steps=20 --model_dir=workdir/drugs_default --test_csv=data/DRUGS/test_smiles.csv --pg_kernel_size_log_0=1.29503 --pg_kernel_size_log_1=1.45944 --pg_langevin_weight_log_0=-2.88867 --pg_langevin_weight_log_1=-2.47591 --pg_repulsive_weight_log_0=-1.01222 --pg_repulsive_weight_log_1=-1.91253 --pg_weight_log_0=-0.16253 --pg_weight_log_1=0.79355
+
 ## Citation
+
+If you use this code, please cite:
+
     @article{jing2022torsional,
           title={Torsional Diffusion for Molecular Conformer Generation}, 
           author={Bowen Jing and Gabriele Corso and Jeffrey Chang and Regina Barzilay and Tommi Jaakkola},
           journal={arXiv preprint arXiv:2206.01729},
           year={2022}
     }
 
+If you also employ the particle guidance sampling technique, please also cite:
+
+    @article{corso2023particle,
+          title={Particle Guidance: non-I.I.D. Diverse Sampling with Diffusion Models}, 
+          author={Gabriele Corso and Yilun Xu and Valentin de Bortoli and Regina Barzilay and Tommi Jaakkola},
+          year={2023}
+    }
+
 ## License
 MIT
diff --git a/diffusion/sampling.py b/diffusion/sampling.py
@@ -9,7 +9,10 @@
 from torch_geometric.loader import DataLoader
 from rdkit import Chem, Geometry
 from rdkit.Chem import AllChem
+
+from utils.utils import time_limit, TimeoutException
 from utils.visualise import PDBFile
+from spyrmsd import molecule, graph
 
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 still_frames = 10
@@ -91,13 +94,65 @@ def perturb_seeds(data, pdb=None):
 
 
 def sample(conformers, model, sigma_max=np.pi, sigma_min=0.01 * np.pi, steps=20, batch_size=32,
-           ode=False, likelihood=None, pdb=None):
+           ode=False, likelihood=None, pdb=None, pg_weight_log_0=None, pg_repulsive_weight_log_0=None,
+           pg_weight_log_1=None, pg_repulsive_weight_log_1=None, pg_kernel_size_log_0=None,
+           pg_kernel_size_log_1=None, pg_langevin_weight_log_0=None, pg_langevin_weight_log_1=None,
+           pg_invariant=False, mol=None):
+
     conf_dataset = InferenceDataset(conformers)
     loader = DataLoader(conf_dataset, batch_size=batch_size, shuffle=False)
 
     sigma_schedule = 10 ** np.linspace(np.log10(sigma_max), np.log10(sigma_min), steps + 1)[:-1]
     eps = 1 / steps
 
+    if pg_weight_log_0 is not None and pg_weight_log_1 is not None:
+        edge_index, edge_mask = conformers[0].edge_index, conformers[0].edge_mask
+        edge_list = [[] for _ in range(torch.max(edge_index) + 1)]
+
+        for p in edge_index.T:
+            edge_list[p[0]].append(p[1])
+
+        rot_bonds = [(p[0], p[1]) for i, p in enumerate(edge_index.T) if edge_mask[i]]
+
+        dihedral = []
+        for a, b in rot_bonds:
+            c = edge_list[a][0] if edge_list[a][0] != b else edge_list[a][1]
+            d = edge_list[b][0] if edge_list[b][0] != a else edge_list[b][1]
+            dihedral.append((c.item(), a.item(), b.item(), d.item()))
+        dihedral_numpy = np.asarray(dihedral)
+        dihedral = torch.tensor(dihedral)
+
+        if pg_invariant:
+            try:
+                with time_limit(10):
+                    mol = molecule.Molecule.from_rdkit(mol)
+
+                    aprops = mol.atomicnums
+                    am = mol.adjacency_matrix
+
+                    # Convert molecules to graphs
+                    G = graph.graph_from_adjacency_matrix(am, aprops)
+
+                    # Get all the possible graph isomorphisms
+                    isomorphisms = graph.match_graphs(G, G)
+                    isomorphisms = [iso[0] for iso in isomorphisms]
+                    isomorphisms = np.asarray(isomorphisms)
+
+                    # filter out those having an effect on the dihedrals
+                    dih_iso = isomorphisms[:, dihedral_numpy]
+                    dih_iso = np.unique(dih_iso, axis=0)
+
+                    if len(dih_iso) > 32:
+                        print("reduce isomorphisms from", len(dih_iso), "to", 32)
+                        dih_iso = dih_iso[np.random.choice(len(dih_iso), replace=False, size=32)]
+                    else:
+                        print("isomorphisms", len(dih_iso))
+                    dih_iso = torch.from_numpy(dih_iso).to(device)
+
+            except TimeoutException as e:
+                print("Timeout generating with non invariant kernel")
+                pg_invariant = False
+
     for batch_idx, data in enumerate(loader):
 
         dlogp = torch.zeros(data.num_graphs)
@@ -112,6 +167,10 @@ def sample(conformers, model, sigma_max=np.pi, sigma_min=0.01 * np.pi, steps=20,
             z = torch.normal(mean=0, std=1, size=data_gpu.edge_pred.shape)
             score = data_gpu.edge_pred.cpu()
 
+            t = sigma_idx / steps   # t is really 1-t
+            pg_weight = 10**(pg_weight_log_0 * t + pg_weight_log_1 * (1 - t)) if pg_weight_log_0 is not None and pg_weight_log_1 is not None else 0.0
+            pg_repulsive_weight = 10**(pg_repulsive_weight_log_0 * t + pg_repulsive_weight_log_1 * (1 - t)) if pg_repulsive_weight_log_0 is not None and pg_repulsive_weight_log_1 is not None else 1.0
+
             if ode:
                 perturb = 0.5 * g ** 2 * eps * score
                 if likelihood:
@@ -120,6 +179,34 @@ def sample(conformers, model, sigma_max=np.pi, sigma_min=0.01 * np.pi, steps=20,
             else:
                 perturb = g ** 2 * eps * score + g * np.sqrt(eps) * z
 
+            if pg_weight > 0:
+                n = data.num_graphs
+                if pg_invariant:
+                    S, D, _ = dih_iso.shape
+                    dih_iso_cat = dih_iso.reshape(-1, 4)
+                    tau = get_torsion_angles(dih_iso_cat, data_gpu.pos, n)
+                    tau_diff = tau.unsqueeze(1) - tau.unsqueeze(0)
+                    tau_diff = torch.fmod(tau_diff + 3 * np.pi, 2 * np.pi) - np.pi
+                    tau_diff = tau_diff.reshape(n, n, S, D)
+                    tau_matrix = torch.sum(tau_diff ** 2, dim=-1, keepdim=True)
+                    tau_matrix, indices = torch.min(tau_matrix, dim=2)
+                    tau_diff = torch.gather(tau_diff, 2, indices.unsqueeze(-1).repeat(1, 1, 1, D)).squeeze(2)
+                else:
+                    tau = get_torsion_angles(dihedral, data_gpu.pos, n)
+                    tau_diff = tau.unsqueeze(1) - tau.unsqueeze(0)
+                    tau_diff = torch.fmod(tau_diff+3*np.pi, 2*np.pi)-np.pi
+                    assert torch.all(tau_diff < np.pi + 0.1) and torch.all(tau_diff > -np.pi - 0.1), tau_diff
+                    tau_matrix = torch.sum(tau_diff**2, dim=-1, keepdim=True)
+
+                kernel_size = 10 ** (pg_kernel_size_log_0 * t + pg_kernel_size_log_1 * (1 - t)) if pg_kernel_size_log_0 is not None and pg_kernel_size_log_1 is not None else 1.0
+                langevin_weight = 10 ** (pg_langevin_weight_log_0 * t + pg_langevin_weight_log_1 * (1 - t)) if pg_langevin_weight_log_0 is not None and pg_langevin_weight_log_1 is not None else 1.0
+
+                k = torch.exp(-1 / kernel_size * tau_matrix)
+                repulsive = torch.sum(2/kernel_size*tau_diff*k, dim=1).cpu().reshape(-1) / n
+
+                perturb = (0.5 * g ** 2 * eps * score) + langevin_weight * (0.5 * g ** 2 * eps * score + g * np.sqrt(eps) * z)
+                perturb += pg_weight * (g ** 2 * eps * (score + pg_repulsive_weight * repulsive))
+
             conf_dataset.apply_torsion_and_update_pos(data, perturb.numpy())
             data_gpu.pos = data.pos.to(device)
 
diff --git a/generate_confs.py b/generate_confs.py
@@ -34,6 +34,16 @@
 parser.add_argument('--batch_size', type=int, default=32, help='Number of conformers generated in parallel')
 parser.add_argument('--xtb', type=str, default=None, help='If set, it indicates path to local xtb main directory')
 parser.add_argument('--no_energy', action='store_true', default=False, help='If set skips computation of likelihood, energy etc')
+
+parser.add_argument('--pg_weight_log_0', type=float, default=None)
+parser.add_argument('--pg_weight_log_1', type=float, default=None)
+parser.add_argument('--pg_repulsive_weight_log_0', type=float, default=None)
+parser.add_argument('--pg_repulsive_weight_log_1', type=float, default=None)
+parser.add_argument('--pg_langevin_weight_log_0', type=float, default=None)
+parser.add_argument('--pg_langevin_weight_log_1', type=float, default=None)
+parser.add_argument('--pg_kernel_size_log_0', type=float, default=None)
+parser.add_argument('--pg_kernel_size_log_1', type=float, default=None)
+parser.add_argument('--pg_invariant', type=bool, default=False)
 args = parser.parse_args()
 
 """
@@ -113,7 +123,15 @@ def sample_confs(raw_smi, n_confs, smi):
 
     if not args.no_model and n_rotable_bonds > 0.5:
         conformers = sample(conformers, model, args.sigma_max, args.sigma_min, args.inference_steps,
-                            args.batch_size, args.ode, args.likelihood, pdb)
+                            args.batch_size, args.ode, args.likelihood, pdb,
+                            pg_weight_log_0=args.pg_weight_log_0, pg_weight_log_1=args.pg_weight_log_1,
+                            pg_repulsive_weight_log_0=args.pg_repulsive_weight_log_0,
+                            pg_repulsive_weight_log_1=args.pg_repulsive_weight_log_1,
+                            pg_kernel_size_log_0=args.pg_kernel_size_log_0,
+                            pg_kernel_size_log_1=args.pg_kernel_size_log_1,
+                            pg_langevin_weight_log_0=args.pg_langevin_weight_log_0,
+                            pg_langevin_weight_log_1=args.pg_langevin_weight_log_1,
+                            pg_invariant=args.pg_invariant, mol=mol)
 
     if args.dump_pymol:
         if not osp.isdir(args.dump_pymol):
diff --git a/utils/torsion.py b/utils/torsion.py
@@ -100,4 +100,25 @@ def perturb_batch(data, torsion_updates, split=False, return_updates=False):
         idx_edges += mask_rotate.shape[0]
     if return_updates:
         return pos_new, torsion_update_list
-    return pos_new
+    return pos_new
+
+
+def bdot(a, b):
+    return torch.sum(a*b, dim=-1, keepdim=True)
+
+
+def get_torsion_angles(dihedral, batch_pos, batch_size):
+    batch_pos = batch_pos.reshape(batch_size, -1, 3)
+
+    c, a, b, d = dihedral[:, 0], dihedral[:, 1], dihedral[:, 2], dihedral[:, 3]
+    c_project_ab = batch_pos[:,a] + bdot(batch_pos[:,c] - batch_pos[:,a], batch_pos[:,b] - batch_pos[:,a]) / bdot(batch_pos[:,b] - batch_pos[:,a], batch_pos[:,b] - batch_pos[:,a]) * (batch_pos[:,b] - batch_pos[:,a])
+    d_project_ab = batch_pos[:,a] + bdot(batch_pos[:,d] - batch_pos[:,a], batch_pos[:,b] - batch_pos[:,a]) / bdot(batch_pos[:,b] - batch_pos[:,a], batch_pos[:,b] - batch_pos[:,a]) * (batch_pos[:,b] - batch_pos[:,a])
+    dshifted = batch_pos[:,d] - d_project_ab + c_project_ab
+    cos = bdot(dshifted - c_project_ab, batch_pos[:,c] - c_project_ab) / (
+                torch.norm(dshifted - c_project_ab, dim=-1, keepdim=True) * torch.norm(batch_pos[:,c] - c_project_ab, dim=-1,
+                                                                                       keepdim=True))
+    cos = torch.clamp(cos, -1 + 1e-5, 1 - 1e-5)
+    angle = torch.acos(cos)
+    sign = torch.sign(bdot(torch.cross(dshifted - c_project_ab, batch_pos[:,c] - c_project_ab), batch_pos[:,b] - batch_pos[:,a]))
+    torsion_angles = (angle * sign).squeeze(-1)
+    return torsion_angles
diff --git a/utils/utils.py b/utils/utils.py
@@ -54,3 +54,20 @@ def start(self, tag):
     def end(self, tag):
         self.times[tag] += time.time() - self.starts[tag]
         del self.starts[tag]
+
+
+import signal
+from contextlib import contextmanager
+class TimeoutException(Exception): pass
+
+@contextmanager
+def time_limit(seconds):
+    def signal_handler(signum, frame):
+        raise TimeoutException("Timed out!")
+
+    signal.signal(signal.SIGALRM, signal_handler)
+    signal.alarm(seconds)
+    try:
+        yield
+    finally:
+        signal.alarm(0)