Add unsharded module reference to sharded modules (#2901)

aporialiao · facebook-github-bot · commit 09fcea8b0bee · 2025-04-21T17:56:34.000-07:00
Summary:

Adding a simple unsharded module reference to sharded modules. This will be used in Dynamic Sharding by `DistributedModelParallel` to reshard an already-sharded_module.

As DMP is created with only one-way relationship in mind, accessing the unsharded module type will help determine which sharder to use in 'resharding'.

Differential Revision: D73407830
diff --git a/torchrec/distributed/embedding.py b/torchrec/distributed/embedding.py
@@ -1409,6 +1409,10 @@ def _embedding_dim_for_sharding_type(self, sharding_type: str) -> int:
     def fused_optimizer(self) -> KeyedOptimizer:
         return self._optim
 
+    @property
+    def unsharded_module_type(self) -> Type[EmbeddingCollection]:
+        return EmbeddingCollection
+
     def create_context(self) -> EmbeddingCollectionContext:
         return EmbeddingCollectionContext(sharding_contexts=[])
 
diff --git a/torchrec/distributed/embeddingbag.py b/torchrec/distributed/embeddingbag.py
@@ -1598,6 +1598,10 @@ def create_context(self) -> EmbeddingBagCollectionContext:
     def extend_shard_name(shard_name: str) -> str:
         return f"embedding_bags.{shard_name}.weight"
 
+    @property
+    def unsharded_module_type(self) -> Type[EmbeddingBagCollection]:
+        return EmbeddingBagCollection
+
 
 class EmbeddingBagCollectionSharder(BaseEmbeddingSharder[EmbeddingBagCollection]):
     """
diff --git a/torchrec/distributed/fp_embeddingbag.py b/torchrec/distributed/fp_embeddingbag.py
@@ -161,6 +161,10 @@ def sharded_parameter_names(self, prefix: str = "") -> Iterator[str]:
             if "_embedding_bag_collection" in fqn:
                 yield append_prefix(prefix, fqn)
 
+    @property
+    def unsharded_module_type(self) -> Type[FeatureProcessedEmbeddingBagCollection]:
+        return FeatureProcessedEmbeddingBagCollection
+
 
 class FeatureProcessedEmbeddingBagCollectionSharder(
     BaseEmbeddingSharder[FeatureProcessedEmbeddingBagCollection]
diff --git a/torchrec/distributed/fused_embeddingbag.py b/torchrec/distributed/fused_embeddingbag.py
@@ -85,6 +85,10 @@ def __init__(
                 # We need to ensure that a checkpoint from DDP and a checkpoint from a
                 # model parallel version are compatible.
 
+    @property
+    def unsharded_module_type(self) -> Type[FusedEmbeddingBagCollection]:
+        return FusedEmbeddingBagCollection
+
 
 class FusedEmbeddingBagCollectionSharder(
     BaseEmbeddingSharder[FusedEmbeddingBagCollection]
diff --git a/torchrec/distributed/mc_embedding.py b/torchrec/distributed/mc_embedding.py
@@ -97,6 +97,10 @@ def create_context(
     ) -> ManagedCollisionEmbeddingCollectionContext:
         return ManagedCollisionEmbeddingCollectionContext(sharding_contexts=[])
 
+    @property
+    def unsharded_module_type(self) -> Type[ManagedCollisionEmbeddingCollection]:
+        return ManagedCollisionEmbeddingCollection
+
 
 class ManagedCollisionEmbeddingCollectionSharder(
     BaseManagedCollisionEmbeddingCollectionSharder[ManagedCollisionEmbeddingCollection]
diff --git a/torchrec/distributed/mc_embeddingbag.py b/torchrec/distributed/mc_embeddingbag.py
@@ -82,6 +82,10 @@ def create_context(
     ) -> ManagedCollisionEmbeddingBagCollectionContext:
         return ManagedCollisionEmbeddingBagCollectionContext(sharding_contexts=[])
 
+    @property
+    def unsharded_module_type(self) -> Type[ManagedCollisionEmbeddingBagCollection]:
+        return ManagedCollisionEmbeddingBagCollection
+
 
 class ManagedCollisionEmbeddingBagCollectionSharder(
     BaseManagedCollisionEmbeddingCollectionSharder[
diff --git a/torchrec/distributed/quant_embedding.py b/torchrec/distributed/quant_embedding.py
@@ -1320,6 +1320,10 @@ def sharded_parameter_names(self, prefix: str = "") -> Iterator[str]:
         for fqn, _ in self.named_buffers():
             yield append_prefix(prefix, fqn)
 
+    @property
+    def unsharded_module_type(self) -> Type[QuantManagedCollisionEmbeddingCollection]:
+        return QuantManagedCollisionEmbeddingCollection
+
 
 class QuantManagedCollisionEmbeddingCollectionSharder(
     BaseQuantEmbeddingSharder[QuantManagedCollisionEmbeddingCollection]
diff --git a/torchrec/distributed/quant_embeddingbag.py b/torchrec/distributed/quant_embeddingbag.py
@@ -383,6 +383,10 @@ def create_context(self) -> NullShardedModuleContext:
 
         return NullShardedModuleContext()
 
+    @property
+    def unsharded_module_type(self) -> Type[QuantEmbeddingBagCollection]:
+        return QuantEmbeddingBagCollection
+
 
 class QuantEmbeddingBagCollectionSharder(
     BaseQuantEmbeddingSharder[QuantEmbeddingBagCollection]
diff --git a/torchrec/distributed/tests/test_embedding_types.py b/torchrec/distributed/tests/test_embedding_types.py
@@ -8,9 +8,10 @@
 # pyre-strict
 
 import unittest
-from typing import Dict, List
+from typing import Dict, List, Type
 
 import torch
+from torch import nn
 from torchrec.distributed.embedding_types import KJTList, ShardedEmbeddingModule
 from torchrec.distributed.embeddingbag import EmbeddingBagCollectionContext
 from torchrec.distributed.types import Awaitable, LazyAwaitable
@@ -55,6 +56,11 @@ def compute(self, ctx: ShrdCtx, dist_input: CompIn) -> DistOut:
     def output_dist(self, ctx: ShrdCtx, output: DistOut) -> LazyAwaitable[Out]:
         pass
 
+    @property
+    def unsharded_module_type(self) -> Type[nn.Module]:
+        # Since this is a fake sharded embedding module, just returning default module
+        return nn.Module
+
 
 class TestShardedEmbeddingModule(unittest.TestCase):
     def test_train_mode(self) -> None:
diff --git a/torchrec/distributed/types.py b/torchrec/distributed/types.py
@@ -1034,6 +1034,10 @@ def sharded_parameter_names(self, prefix: str = "") -> Iterator[str]:
         for key, _ in self.named_parameters(prefix):
             yield key
 
+    @property
+    @abc.abstractmethod
+    def unsharded_module_type(self) -> Type[nn.Module]: ...
+
 
 def get_tensor_size_bytes(t: torch.Tensor) -> int:
     b: int = t.numel() * t.element_size()