Update documentation of to_pyg()

Iñigo Gabirondo · Iñigo Gabirondo · commit 1874e783867b · 2024-05-16T11:03:01.000+02:00
diff --git a/programl/transform_ops.py b/programl/transform_ops.py
@@ -265,7 +265,7 @@ def _run_one(graph: ProgramGraph) -> str:
 def to_pyg(
     graphs: Union[ProgramGraph, Iterable[ProgramGraph]],
     timeout: int = 300,
-    vocabulary: Dict[str, int] = None,
+    vocabulary: Optional[Dict[str, int]] = None,
     executor: Optional[ExecutorLike] = None,
     chunksize: Optional[int] = None,
 ) -> Union[HeteroData, Iterable[HeteroData]]:
@@ -281,6 +281,10 @@ def to_pyg(
         graph conversion before raising an error. If multiple inputs are
         provided, this timeout is per-input.
 
+    :param vocabulary: A dictionary containing ProGraML's vocabulary, where the
+        keys are the text attribute of the nodes and the values their respective
+        indexes.
+
     :param executor: An executor object, with method :code:`submit(callable,
         *args, **kwargs)` and returning a Future-like object with methods
         :code:`done() -> bool` and :code:`result() -> float`. The executor role
@@ -299,16 +303,17 @@ def to_pyg(
     """
 
     def _run_one(graph: ProgramGraph) -> HeteroData:
-        # 3 lists, one per edge type
-        # (control, data and call edges)
+        # 4 lists, one per edge type
+        # (control, data, call and type edges)
         adjacencies = [[], [], [], []]
         edge_positions = [[], [], [], []]
 
-        # Create the adjacency lists
+        # Create the adjacency lists and the positions
         for edge in graph.edge:
             adjacencies[edge.flow].append([edge.source, edge.target])
             edge_positions[edge.flow].append(edge.position)
 
+        # Store the text attributes
         node_text = [node.text for node in graph.node]
 
         vocab_ids = None