Skip to content

Commit 65dbf89

Browse files
mfbalinBowenYao18
authored andcommitted
[GraphBolt][Dataset] Merging part of dmlc#7708, igb-het small datasets by Bowen Yao's (dmlc#7788)
Co-authored-by: Bowen Yao <112051015+BowenYao18@users.noreply.github.com> Co-authored-by: BowenYao18 <by18@rice.edu>
1 parent a01c0c5 commit 65dbf89

File tree

2 files changed

+28
-7
lines changed

2 files changed

+28
-7
lines changed

examples/graphbolt/pyg/hetero/node_classification.py

+12-7
Original file line numberDiff line numberDiff line change
@@ -58,12 +58,12 @@ def create_dataloader(
5858
datapipe = datapipe.copy_to(device=device)
5959
need_copy = False
6060

61+
node_feature_keys = {"paper": ["feat"], "author": ["feat"]}
6162
if args.dataset == "ogb-lsc-mag240m":
62-
node_feature_keys = {
63-
"paper": ["feat"],
64-
"author": ["feat"],
65-
"institution": ["feat"],
66-
}
63+
node_feature_keys["institution"] = ["feat"]
64+
if "igb-het" in args.dataset:
65+
node_feature_keys["institute"] = ["feat"]
66+
node_feature_keys["fos"] = ["feat"]
6767
# Fetch node features for the sampled subgraph.
6868
datapipe = datapipe.fetch_feature(features, node_feature_keys)
6969

@@ -335,8 +335,13 @@ def parse_args():
335335
"--dataset",
336336
type=str,
337337
default="ogb-lsc-mag240m",
338-
choices=["ogb-lsc-mag240m"],
339-
help="Dataset name. Possible values: ogb-lsc-mag240m",
338+
choices=[
339+
"ogb-lsc-mag240m",
340+
"igb-het-tiny",
341+
"igb-het-small",
342+
"igb-het-medium",
343+
],
344+
help="Dataset name. Possible values: ogb-lsc-mag240m, igb-het-[tiny|small|medium].",
340345
)
341346
parser.add_argument(
342347
"--fanout",

python/dgl/graphbolt/impl/ondisk_dataset.py

+16
Original file line numberDiff line numberDiff line change
@@ -990,6 +990,16 @@ class BuiltinDataset(OnDiskDataset):
990990
Self edges are added to the original graph.
991991
Node features are stored as float32.
992992
993+
**igb-het-[tiny|small|medium]**
994+
The igb-hom-[tiny|small|medium] dataset is a heterogeneous citation network,
995+
which is designed for developers to train and evaluate GNN models with
996+
high fidelity. See more details in `igb-het-[tiny|small|medium]
997+
<https://github.com/IllinoisGraphBenchmark/IGB-Datasets>`_.
998+
999+
.. note::
1000+
Four Reverse edge types are added to the original graph.
1001+
Node features are stored as float32.
1002+
9931003
Parameters
9941004
----------
9951005
name : str
@@ -1019,6 +1029,10 @@ class BuiltinDataset(OnDiskDataset):
10191029
"igb-hom-tiny-seeds",
10201030
"igb-hom-small",
10211031
"igb-hom-small-seeds",
1032+
"igb-het-tiny",
1033+
"igb-het-tiny-seeds",
1034+
"igb-het-small",
1035+
"igb-het-small-seeds",
10221036
]
10231037
_large_datasets = [
10241038
"ogb-lsc-mag240m",
@@ -1031,6 +1045,8 @@ class BuiltinDataset(OnDiskDataset):
10311045
"igb-hom-large-seeds",
10321046
"igb-hom",
10331047
"igb-hom-seeds",
1048+
"igb-het-medium",
1049+
"igb-het-medium-seeds",
10341050
]
10351051
_all_datasets = _datasets + _large_datasets
10361052

0 commit comments

Comments
 (0)