diff --git a/examples/creating-datasets.ipynb b/examples/creating-datasets.ipynb index 7d8bb4d3..33da3b40 100644 --- a/examples/creating-datasets.ipynb +++ b/examples/creating-datasets.ipynb @@ -21,6 +21,35 @@ "cell_type": "code", "execution_count": 1, "metadata": { + "execution": { + "iopub.execute_input": "2024-01-30T16:30:48.077646Z", + "iopub.status.busy": "2024-01-30T16:30:48.077036Z", + "iopub.status.idle": "2024-01-30T16:30:49.133772Z", + "shell.execute_reply": "2024-01-30T16:30:49.133329Z" + } + }, + "outputs": [], + "source": [ + "try:\n", + " import qcportal\n", + " from openeye import oechem\n", + " import qcportal\n", + "except:\n", + " pass\n", + "\n", + "import qcportal" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-30T16:30:49.136777Z", + "iopub.status.busy": "2024-01-30T16:30:49.136484Z", + "iopub.status.idle": "2024-01-30T16:30:49.139231Z", + "shell.execute_reply": "2024-01-30T16:30:49.138777Z" + }, "pycharm": { "name": "#%%\n" } @@ -50,8 +79,14 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "metadata": { + "execution": { + "iopub.execute_input": "2024-01-30T16:30:49.141727Z", + "iopub.status.busy": "2024-01-30T16:30:49.141424Z", + "iopub.status.idle": "2024-01-30T16:30:50.705928Z", + "shell.execute_reply": "2024-01-30T16:30:50.705260Z" + }, "pycharm": { "name": "#%% \n" } @@ -63,7 +98,7 @@ "BasicDatasetFactory(qc_specifications={'default': QCSpec(method='B3LYP-D3BJ', basis='DZVP', program='psi4', spec_name='default', spec_description='Standard OpenFF optimization quantum chemistry specification.', store_wavefunction=, implicit_solvent=None, maxiter=200, scf_properties=[, , , ], keywords={}), 'ani1ccx': QCSpec(method='ani1ccx', basis=None, program='torchani', spec_name='ani1ccx', spec_description='ANI1ccx standard specification', store_wavefunction=, implicit_solvent=None, maxiter=200, scf_properties=[, , , ], keywords={})}, driver=, priority='normal', dataset_tags=['openff'], compute_tag='openff', type='BasicDatasetFactory', workflow=[])" ] }, - "execution_count": 1, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -115,8 +150,14 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": { + "execution": { + "iopub.execute_input": "2024-01-30T16:30:50.741672Z", + "iopub.status.busy": "2024-01-30T16:30:50.741345Z", + "iopub.status.idle": "2024-01-30T16:30:50.745657Z", + "shell.execute_reply": "2024-01-30T16:30:50.745022Z" + }, "pycharm": { "name": "#%%\n" } @@ -128,7 +169,7 @@ "[]" ] }, - "execution_count": 2, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -157,8 +198,14 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "metadata": { + "execution": { + "iopub.execute_input": "2024-01-30T16:30:50.748999Z", + "iopub.status.busy": "2024-01-30T16:30:50.748550Z", + "iopub.status.idle": "2024-01-30T16:30:50.751665Z", + "shell.execute_reply": "2024-01-30T16:30:50.751134Z" + }, "pycharm": { "name": "#%%\n" } @@ -198,8 +245,14 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "metadata": { + "execution": { + "iopub.execute_input": "2024-01-30T16:30:50.754025Z", + "iopub.status.busy": "2024-01-30T16:30:50.753859Z", + "iopub.status.idle": "2024-01-30T16:30:50.756926Z", + "shell.execute_reply": "2024-01-30T16:30:50.756318Z" + }, "pycharm": { "name": "#%%\n" } @@ -232,8 +285,14 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "metadata": { + "execution": { + "iopub.execute_input": "2024-01-30T16:30:50.759540Z", + "iopub.status.busy": "2024-01-30T16:30:50.759298Z", + "iopub.status.idle": "2024-01-30T16:30:50.763020Z", + "shell.execute_reply": "2024-01-30T16:30:50.762310Z" + }, "pycharm": { "name": "#%%\n" } @@ -247,7 +306,7 @@ " StandardConformerGenerator(type='StandardConformerGenerator', rms_cutoff=None, max_conformers=1, clear_existing=True)]" ] }, - "execution_count": 5, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -269,8 +328,14 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "metadata": { + "execution": { + "iopub.execute_input": "2024-01-30T16:30:50.766252Z", + "iopub.status.busy": "2024-01-30T16:30:50.765891Z", + "iopub.status.idle": "2024-01-30T16:30:50.773571Z", + "shell.execute_reply": "2024-01-30T16:30:50.773157Z" + }, "pycharm": { "name": "#%%\n" } @@ -294,8 +359,14 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "metadata": { + "execution": { + "iopub.execute_input": "2024-01-30T16:30:50.775858Z", + "iopub.status.busy": "2024-01-30T16:30:50.775673Z", + "iopub.status.idle": "2024-01-30T16:30:50.916582Z", + "shell.execute_reply": "2024-01-30T16:30:50.915926Z" + }, "pycharm": { "name": "#%%\n" } @@ -345,8 +416,15 @@ }, { "cell_type": "code", - "execution_count": 8, - "metadata": {}, + "execution_count": 10, + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-30T16:30:50.919654Z", + "iopub.status.busy": "2024-01-30T16:30:50.919396Z", + "iopub.status.idle": "2024-01-30T16:30:50.926695Z", + "shell.execute_reply": "2024-01-30T16:30:50.925553Z" + } + }, "outputs": [], "source": [ "imported_factory = BasicDatasetFactory.from_file(\"example-factory.json\")" @@ -363,8 +441,14 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "metadata": { + "execution": { + "iopub.execute_input": "2024-01-30T16:30:50.930138Z", + "iopub.status.busy": "2024-01-30T16:30:50.929934Z", + "iopub.status.idle": "2024-01-30T16:30:51.273263Z", + "shell.execute_reply": "2024-01-30T16:30:51.272677Z" + }, "scrolled": true }, "outputs": [], @@ -411,8 +495,14 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "metadata": { + "execution": { + "iopub.execute_input": "2024-01-30T16:30:51.276211Z", + "iopub.status.busy": "2024-01-30T16:30:51.275948Z", + "iopub.status.idle": "2024-01-30T16:31:01.493203Z", + "shell.execute_reply": "2024-01-30T16:31:01.492685Z" + }, "scrolled": true }, "outputs": [ @@ -420,20 +510,148 @@ "name": "stderr", "output_type": "stream", "text": [ - "Deduplication : 100%|██████████| 20/20 [00:00<00:00, 465.41it/s]\n", - "ElementFilter : 100%|███████████| 20/20 [00:04<00:00, 4.51it/s]\n", - "MolecularWeightFilter : 100%|███████████| 19/19 [00:03<00:00, 5.00it/s]\n", - "StandardConformerGenerator : 100%|█████████████| 2/2 [00:02<00:00, 1.22s/it]\n", - "Preparation : 100%|█████████████| 2/2 [00:00<00:00, 48.09it/s]\n" + "\r", + "Deduplication : 0%| | 0/20 [00:00, implicit_solvent=None, maxiter=200, scf_properties=[, , , ], keywords={}), 'ani1ccx': QCSpec(method='ani1ccx', basis=None, program='torchani', spec_name='ani1ccx', spec_description='ANI1ccx standard specification', store_wavefunction=, implicit_solvent=None, maxiter=200, scf_properties=[, , , ], keywords={})}, driver=, priority='normal', dataset_tags=['openff'], compute_tag='openff', dataset_name='example-dataset', dataset_tagline='An example dataset.', type='DataSet', description='An example dataset.', metadata=Metadata(submitter='jeffreywagner', creation_date=datetime.date(2023, 10, 31), collection_type='DataSet', dataset_name='example-dataset', short_description='An example dataset.', long_description_url=None, long_description='An example dataset.', elements={'N', 'C', 'H', 'O'}), provenance={'openff-qcsubmit': '0.5.0+108.g3489932.dirty', 'openff-toolkit': '0.13.1', 'OpenEyeToolkitWrapper': '2023.1.1', 'RDKitToolkitWrapper': '2023.03.1', 'AmberToolsToolkitWrapper': '22.0'}, dataset={'c1ccc(cc1)/N=C\\\\NO': DatasetEntry(index='c1ccc(cc1)/N=C\\\\NO', initial_molecules=[Molecule(name='C7H8N2O', formula='C7H8N2O', hash='dcb3621')], attributes=MoleculeAttributes(canonical_smiles='c1ccc(cc1)N=CNO', canonical_isomeric_smiles='c1ccc(cc1)/N=C\\\\NO', canonical_explicit_hydrogen_smiles='[H]c1c(c(c(c(c1[H])[H])N=C([H])N([H])O[H])[H])[H]', canonical_isomeric_explicit_hydrogen_smiles='[H]c1c(c(c(c(c1[H])[H])/N=C(/[H])\\\\N([H])O[H])[H])[H]', canonical_isomeric_explicit_hydrogen_mapped_smiles='[H:11][c:1]1[c:2]([c:3]([c:4]([c:5]([c:6]1[H:15])[H:14])/[N:7]=[C:8](/[H:16])\\\\[N:9]([H:17])[O:10][H:18])[H:13])[H:12]', molecular_formula='C7H8N2O', standard_inchi='InChI=1S/C7H8N2O/c10-9-6-8-7-4-2-1-3-5-7/h1-6,10H,(H,8,9)', inchi_key='FEUZPLBUEYBLTN-UHFFFAOYSA-N', fixed_hydrogen_inchi='InChI=1/C7H8N2O/c10-9-6-8-7-4-2-1-3-5-7/h1-6,10H,(H,8,9)/f/h9H/b8-6-', fixed_hydrogen_inchi_key='FEUZPLBUEYBLTN-NAFDMULTNA-N', unique_fixed_hydrogen_inchi_keys={'FEUZPLBUEYBLTN-NAFDMULTNA-N'}), extras={'canonical_isomeric_explicit_hydrogen_mapped_smiles': '[H:11][c:1]1[c:2]([c:3]([c:4]([c:5]([c:6]1[H:15])[H:14])/[N:7]=[C:8](/[H:16])\\\\[N:9]([H:17])[O:10][H:18])[H:13])[H:12]'}, keywords={}), 'C1=COC(=C1)C2=CC=CO2': DatasetEntry(index='C1=COC(=C1)C2=CC=CO2', initial_molecules=[Molecule(name='C8H6O2', formula='C8H6O2', hash='3ebf2bb')], attributes=MoleculeAttributes(canonical_smiles='C1=COC(=C1)C2=CC=CO2', canonical_isomeric_smiles='C1=COC(=C1)C2=CC=CO2', canonical_explicit_hydrogen_smiles='[H]C1=C(OC(=C1[H])C2=C(C(=C(O2)[H])[H])[H])[H]', canonical_isomeric_explicit_hydrogen_smiles='[H]C1=C(OC(=C1[H])C2=C(C(=C(O2)[H])[H])[H])[H]', canonical_isomeric_explicit_hydrogen_mapped_smiles='[H:11][C:1]1=[C:5]([O:4][C:3](=[C:2]1[H:12])[C:6]2=[C:7]([C:8](=[C:9]([O:10]2)[H:16])[H:15])[H:14])[H:13]', molecular_formula='C8H6O2', standard_inchi='InChI=1S/C8H6O2/c1-3-7(9-5-1)8-4-2-6-10-8/h1-6H', inchi_key='UDHZFLBMZZVHRA-UHFFFAOYSA-N', fixed_hydrogen_inchi='InChI=1/C8H6O2/c1-3-7(9-5-1)8-4-2-6-10-8/h1-6H', fixed_hydrogen_inchi_key='UDHZFLBMZZVHRA-UHFFFAOYNA-N', unique_fixed_hydrogen_inchi_keys={'UDHZFLBMZZVHRA-UHFFFAOYNA-N'}), extras={'canonical_isomeric_explicit_hydrogen_mapped_smiles': '[H:11][C:1]1=[C:5]([O:4][C:3](=[C:2]1[H:12])[C:6]2=[C:7]([C:8](=[C:9]([O:10]2)[H:16])[H:15])[H:14])[H:13]'}, keywords={})}, filtered_molecules={'ElementFilter': FilterEntry(component='ElementFilter', component_settings={'type': 'ElementFilter', 'allowed_elements': [1, 6, 7, 8]}, component_provenance={'openff-toolkit': '0.13.1', 'openff-qcsubmit': '0.5.0+108.g3489932.dirty', 'OpenEyeToolkitWrapper': '2023.1.1', 'RDKitToolkitWrapper': '2023.03.1', 'AmberToolsToolkitWrapper': '22.0', 'openmm_elements': '8.0'}, molecules=['[H]C([H])([H])C1=NC(=NC1=[N+]=[N-])Cl']), 'MolecularWeightFilter': FilterEntry(component='MolecularWeightFilter', component_settings={'type': 'MolecularWeightFilter', 'minimum_weight': 130, 'maximum_weight': 781}, component_provenance={'openff-toolkit': '0.13.1', 'openff-qcsubmit': '0.5.0+108.g3489932.dirty', 'OpenEyeToolkitWrapper': '2023.1.1', 'RDKitToolkitWrapper': '2023.03.1', 'AmberToolsToolkitWrapper': '22.0'}, molecules=['[H]/N=C(/N([H])[H])\\\\N([H])C1=NN=NN1[H]', '[H]c1c(c([n+](c(c1[H])[H])[H])[H])[H]', '[H]C([H])([H])[N+](C([H])([H])[H])(C([H])([H])[H])[O-]', '[H]C([H])([H])ON([H])C(=O)N([H])[H]', '[H]c1c(c(c2c(c1[H])C(=C(N2[H])[H])[H])[H])[H]', '[H]C(=C([H])O[H])[H]', '[H]C1=C(OC(=C1[O-])[H])[H]', '[H]C([H])([H])C(=O)N([H])O[H]', '[H]C(=[N+](C([H])([H])[H])C([H])([H])[H])[H]', '[H]C(=O)C(=O)[H]', '[H]C(=C([H])[H])[H]', '[H]c1c(c([n+](c(c1[H])[H])[O-])[H])[H]', '[H]C([H])([H])N(C([H])([H])[H])O[H]', '[H]ON(=O)=O', '[H]C(=O)C([H])([H])[H]', '[H]C([H])([H])C([H])([H])[H]', '[H]C1=C(C(C(=C(C1=O)[H])[H])([H])[H])[H]']), 'StandardConformerGenerator': FilterEntry(component='StandardConformerGenerator', component_settings={'type': 'StandardConformerGenerator', 'rms_cutoff': None, 'max_conformers': 1, 'clear_existing': True}, component_provenance={'openff-toolkit': '0.13.1', 'openff-qcsubmit': '0.5.0+108.g3489932.dirty', 'OpenEyeToolkitWrapper': '2023.1.1', 'RDKitToolkitWrapper': '2023.03.1', 'AmberToolsToolkitWrapper': '22.0'}, molecules=[])})" + "BasicDataset(qc_specifications={'default': QCSpec(method='B3LYP-D3BJ', basis='DZVP', program='psi4', spec_name='default', spec_description='Standard OpenFF optimization quantum chemistry specification.', store_wavefunction=, implicit_solvent=None, maxiter=200, scf_properties=[, , , ], keywords={}), 'ani1ccx': QCSpec(method='ani1ccx', basis=None, program='torchani', spec_name='ani1ccx', spec_description='ANI1ccx standard specification', store_wavefunction=, implicit_solvent=None, maxiter=200, scf_properties=[, , , ], keywords={})}, driver=, priority='normal', dataset_tags=['openff'], compute_tag='openff', dataset_name='example-dataset', dataset_tagline='An example dataset.', type='DataSet', description='An example dataset.', metadata=Metadata(submitter='mattthompson', creation_date=datetime.date(2024, 1, 30), collection_type='DataSet', dataset_name='example-dataset', short_description='An example dataset.', long_description_url=None, long_description='An example dataset.', elements={'C', 'N', 'H', 'O'}), provenance={'openff-qcsubmit': '0.50.2+0.g2fa465a.dirty', 'openff-toolkit': '0.15.0', 'RDKitToolkitWrapper': '2023.09.4', 'AmberToolsToolkitWrapper': '22.0'}, dataset={'ON/C=N\\\\c1ccccc1': DatasetEntry(index='ON/C=N\\\\c1ccccc1', initial_molecules=[Molecule(name='C7H8N2O', formula='C7H8N2O', hash='3c416ab')], attributes=MoleculeAttributes(canonical_smiles='ONC=Nc1ccccc1', canonical_isomeric_smiles='ON/C=N\\\\c1ccccc1', canonical_explicit_hydrogen_smiles='[H][O][N]([H])[C]([H])=[N][c]1[c]([H])[c]([H])[c]([H])[c]([H])[c]1[H]', canonical_isomeric_explicit_hydrogen_smiles='[H][O][N]([H])/[C]([H])=[N]\\\\[c]1[c]([H])[c]([H])[c]([H])[c]([H])[c]1[H]', canonical_isomeric_explicit_hydrogen_mapped_smiles='[c:1]1([H:11])[c:2]([H:12])[c:3]([H:13])[c:4](/[N:7]=[C:8](\\\\[N:9]([O:10][H:18])[H:17])[H:16])[c:5]([H:14])[c:6]1[H:15]', molecular_formula='C7H8N2O', standard_inchi='InChI=1S/C7H8N2O/c10-9-6-8-7-4-2-1-3-5-7/h1-6,10H,(H,8,9)', inchi_key='FEUZPLBUEYBLTN-UHFFFAOYSA-N', fixed_hydrogen_inchi='InChI=1/C7H8N2O/c10-9-6-8-7-4-2-1-3-5-7/h1-6,10H,(H,8,9)/f/h9H/b8-6-', fixed_hydrogen_inchi_key='FEUZPLBUEYBLTN-NAFDMULTNA-N', unique_fixed_hydrogen_inchi_keys={'FEUZPLBUEYBLTN-NAFDMULTNA-N'}), extras={'canonical_isomeric_explicit_hydrogen_mapped_smiles': '[c:1]1([H:11])[c:2]([H:12])[c:3]([H:13])[c:4](/[N:7]=[C:8](\\\\[N:9]([O:10][H:18])[H:17])[H:16])[c:5]([H:14])[c:6]1[H:15]'}, keywords={}), 'c1coc(-c2ccco2)c1': DatasetEntry(index='c1coc(-c2ccco2)c1', initial_molecules=[Molecule(name='C8H6O2', formula='C8H6O2', hash='3dbee98')], attributes=MoleculeAttributes(canonical_smiles='c1coc(-c2ccco2)c1', canonical_isomeric_smiles='c1coc(-c2ccco2)c1', canonical_explicit_hydrogen_smiles='[H][C]1=[C]([H])[C]([H])=[C]([C]2=[C]([H])[C]([H])=[C]([H])[O]2)[O]1', canonical_isomeric_explicit_hydrogen_smiles='[H][C]1=[C]([H])[C]([H])=[C]([C]2=[C]([H])[C]([H])=[C]([H])[O]2)[O]1', canonical_isomeric_explicit_hydrogen_mapped_smiles='[C:1]1([H:11])=[C:5]([H:13])[O:4][C:3]([C:6]2=[C:7]([H:14])[C:8]([H:15])=[C:9]([H:16])[O:10]2)=[C:2]1[H:12]', molecular_formula='C8H6O2', standard_inchi='InChI=1S/C8H6O2/c1-3-7(9-5-1)8-4-2-6-10-8/h1-6H', inchi_key='UDHZFLBMZZVHRA-UHFFFAOYSA-N', fixed_hydrogen_inchi='InChI=1/C8H6O2/c1-3-7(9-5-1)8-4-2-6-10-8/h1-6H', fixed_hydrogen_inchi_key='UDHZFLBMZZVHRA-UHFFFAOYNA-N', unique_fixed_hydrogen_inchi_keys={'UDHZFLBMZZVHRA-UHFFFAOYNA-N'}), extras={'canonical_isomeric_explicit_hydrogen_mapped_smiles': '[C:1]1([H:11])=[C:5]([H:13])[O:4][C:3]([C:6]2=[C:7]([H:14])[C:8]([H:15])=[C:9]([H:16])[O:10]2)=[C:2]1[H:12]'}, keywords={})}, filtered_molecules={'ElementFilter': FilterEntry(component='ElementFilter', component_settings={'type': 'ElementFilter', 'allowed_elements': [1, 6, 7, 8]}, component_provenance={'openff-toolkit': '0.15.0', 'openff-qcsubmit': '0.50.2+0.g2fa465a.dirty', 'RDKitToolkitWrapper': '2023.09.4', 'AmberToolsToolkitWrapper': '22.0', 'openff-units_elements': '0.2.1'}, molecules=['[H][C]([H])([H])[C]1=[N][C]([Cl])=[N][C]1=[N+]=[N-]']), 'MolecularWeightFilter': FilterEntry(component='MolecularWeightFilter', component_settings={'type': 'MolecularWeightFilter', 'minimum_weight': 130, 'maximum_weight': 781}, component_provenance={'openff-toolkit': '0.15.0', 'openff-qcsubmit': '0.50.2+0.g2fa465a.dirty', 'RDKitToolkitWrapper': '2023.09.4', 'AmberToolsToolkitWrapper': '22.0'}, molecules=['[H]/[N]=[C](/[N]([H])[H])[N]([H])[C]1=[N][N]=[N][N]1[H]', '[H][c]1[c]([H])[c]([H])[n+]([H])[c]([H])[c]1[H]', '[H][C]([H])([H])[N+]([O-])([C]([H])([H])[H])[C]([H])([H])[H]', '[H][N]([H])[C](=[O])[N]([H])[O][C]([H])([H])[H]', '[H][C]1=[C]([H])[N]([H])[c]2[c]([H])[c]([H])[c]([H])[c]([H])[c]21', '[H][O][C]([H])=[C]([H])[H]', '[H][C]1=[C]([H])[C]([O-])=[C]([H])[O]1', '[H][O][N]([H])[C](=[O])[C]([H])([H])[H]', '[H][C]([H])=[N+]([C]([H])([H])[H])[C]([H])([H])[H]', '[H][C](=[O])[C]([H])=[O]', '[H][C]([H])=[C]([H])[H]', '[H][c]1[c]([H])[c]([H])[n+]([O-])[c]([H])[c]1[H]', '[H][O][N]([C]([H])([H])[H])[C]([H])([H])[H]', '[H][O][N+](=[O])[O-]', '[H][C](=[O])[C]([H])([H])[H]', '[H][C]([H])([H])[C]([H])([H])[H]', '[H][C]1=[C]([H])[C]([H])([H])[C]([H])=[C]([H])[C]1=[O]']), 'StandardConformerGenerator': FilterEntry(component='StandardConformerGenerator', component_settings={'type': 'StandardConformerGenerator', 'rms_cutoff': None, 'max_conformers': 1, 'clear_existing': True}, component_provenance={'openff-toolkit': '0.15.0', 'openff-qcsubmit': '0.50.2+0.g2fa465a.dirty', 'RDKitToolkitWrapper': '2023.09.4', 'AmberToolsToolkitWrapper': '22.0'}, molecules=[])})" ] }, - "execution_count": 10, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -461,8 +679,14 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "metadata": { + "execution": { + "iopub.execute_input": "2024-01-30T16:31:01.496289Z", + "iopub.status.busy": "2024-01-30T16:31:01.496022Z", + "iopub.status.idle": "2024-01-30T16:31:01.499297Z", + "shell.execute_reply": "2024-01-30T16:31:01.498901Z" + }, "pycharm": { "name": "#%%\n" } @@ -474,7 +698,7 @@ "2" ] }, - "execution_count": 11, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -496,8 +720,14 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 14, "metadata": { + "execution": { + "iopub.execute_input": "2024-01-30T16:31:01.501333Z", + "iopub.status.busy": "2024-01-30T16:31:01.501184Z", + "iopub.status.idle": "2024-01-30T16:31:01.504091Z", + "shell.execute_reply": "2024-01-30T16:31:01.503710Z" + }, "pycharm": { "name": "#%%\n" } @@ -509,7 +739,7 @@ "2" ] }, - "execution_count": 12, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -531,8 +761,14 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 15, "metadata": { + "execution": { + "iopub.execute_input": "2024-01-30T16:31:01.506175Z", + "iopub.status.busy": "2024-01-30T16:31:01.506009Z", + "iopub.status.idle": "2024-01-30T16:31:01.517084Z", + "shell.execute_reply": "2024-01-30T16:31:01.516666Z" + }, "pycharm": { "name": "#%%\n" } @@ -542,8 +778,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "c1ccc(cc1)/N=C\\NO\n", - "C1=COC(=C1)C2=CC=CO2\n" + "ON/C=N\\c1ccccc1\n", + "c1coc(-c2ccco2)c1\n" ] } ], @@ -565,8 +801,14 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 16, "metadata": { + "execution": { + "iopub.execute_input": "2024-01-30T16:31:01.519602Z", + "iopub.status.busy": "2024-01-30T16:31:01.519379Z", + "iopub.status.idle": "2024-01-30T16:31:01.808685Z", + "shell.execute_reply": "2024-01-30T16:31:01.808255Z" + }, "pycharm": { "name": "#%%\n" } @@ -576,24 +818,30 @@ "name": "stdout", "output_type": "stream", "text": [ - "CC1=NC(=NC1=[N+]=[N-])Cl\n", - "[H]/N=C(/N)\\NC1=NN=NN1\n", + "CC1=NC(Cl)=NC1=[N+]=[N-]\n", + "[H]/N=C(/N)Nc1nnn[nH]1\n", "c1cc[nH+]cc1\n", "C[N+](C)(C)[O-]\n", - "CONC(=O)N\n", - "c1ccc2c(c1)C=CN2\n", + "CONC(N)=O\n", + "c1ccc2[nH]ccc2c1\n", "C=CO\n", - "C1=COC=C1[O-]\n", + "[O-]c1ccoc1\n", "CC(=O)NO\n", - "C[N+](=C)C\n", - "C(=O)C=O\n", - "C=C\n", - "c1cc[n+](cc1)[O-]\n", + "C=[N+](C)C\n", + "O=CC=O\n", + "C=C\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[O-][n+]1ccccc1\n", "CN(C)O\n", - "N(=O)(=O)O\n", + "O=[N+]([O-])O\n", "CC=O\n", "CC\n", - "C1C=CC(=O)C=C1\n" + "O=C1C=CCC=C1\n" ] } ], @@ -615,8 +863,15 @@ }, { "cell_type": "code", - "execution_count": 15, - "metadata": {}, + "execution_count": 17, + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-30T16:31:01.811770Z", + "iopub.status.busy": "2024-01-30T16:31:01.811415Z", + "iopub.status.idle": "2024-01-30T16:31:01.815830Z", + "shell.execute_reply": "2024-01-30T16:31:01.815362Z" + } + }, "outputs": [], "source": [ "dataset.export_dataset(\"example-dataset.json\")" @@ -635,8 +890,15 @@ }, { "cell_type": "code", - "execution_count": 16, - "metadata": {}, + "execution_count": 18, + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-30T16:31:01.818113Z", + "iopub.status.busy": "2024-01-30T16:31:01.817937Z", + "iopub.status.idle": "2024-01-30T16:31:01.821400Z", + "shell.execute_reply": "2024-01-30T16:31:01.820854Z" + } + }, "outputs": [], "source": [ "dataset.molecules_to_file(\"example-dataset.smi\", \"smi\")\n", @@ -657,8 +919,14 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 19, "metadata": { + "execution": { + "iopub.execute_input": "2024-01-30T16:31:01.823585Z", + "iopub.status.busy": "2024-01-30T16:31:01.823414Z", + "iopub.status.idle": "2024-01-30T16:31:02.040883Z", + "shell.execute_reply": "2024-01-30T16:31:02.040424Z" + }, "pycharm": { "name": "#%%\n" } @@ -667,13 +935,6 @@ "source": [ "dataset.visualize(\"example-dataset.pdf\")" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -692,9 +953,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.11" + "version": "3.11.7" } }, "nbformat": 4, - "nbformat_minor": 1 + "nbformat_minor": 4 }