-
Notifications
You must be signed in to change notification settings - Fork 31
/
Copy pathMatProjectEFormDataset.py
30 lines (23 loc) · 1.36 KB
/
MatProjectEFormDataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from kgcnn.data.datasets.MatBenchDataset2020 import MatBenchDataset2020
class MatProjectEFormDataset(MatBenchDataset2020):
r"""Store and process :obj:`MatProjectEFormDataset` from `MatBench <https://matbench.materialsproject.org/>`__
database. Name within Matbench: 'matbench_mp_e_form'.
Matbench test dataset for predicting DFT formation energy from structure.
Adapted from Materials Project database. Removed entries having formation energy more than 2.5eV and those
containing noble gases. Retrieved April 2, 2019. For benchmarking w/ nested cross validation,
the order of the dataset must be identical to the retrieved data; refer to the Automatminer/Matbench publication
for more details.
* Number of samples: 132752.
* Task type: regression.
* Input type: structure.
"""
def __init__(self, reload=False, verbose: int = 10):
r"""Initialize 'matbench_mp_e_form' dataset.
Args:
reload (bool): Whether to reload the data and make new dataset. Default is False.
verbose (int): Print progress or info for processing where 60=silent. Default is 10.
"""
# Use default base class init()
super(MatProjectEFormDataset, self).__init__("matbench_mp_e_form", reload=reload, verbose=verbose)
self.label_names = "e_form"
self.label_units = "eV/atom"