sw: Validate GEMM, Layernorm and FA-2 tile footprints in TCDM

colluca · colluca · commit 6a2f11298b15 · 2024-05-07T13:48:23.000+02:00
diff --git a/sw/blas/gemm/scripts/datagen.py b/sw/blas/gemm/scripts/datagen.py
@@ -51,9 +51,21 @@ def validate_config(self, gemm_fp, parallelize_m,
                         transb, M, N, K, beta, **kwargs):
         frac_m = M / m_tiles
         frac_n = N / n_tiles
+        frac_k = K / k_tiles
 
         dtype, impl = self.infer_implementation(gemm_fp)
 
+        # Calculate total TCDM occupation
+        # Note: doesn't account for double buffering
+        prec = data_utils.size_from_precision_t(dtype)
+        a_size = frac_m * frac_k * prec
+        b_size = frac_k * frac_n * prec
+        c_size = frac_m * frac_n * prec
+        total_size = a_size
+        total_size += b_size
+        total_size += c_size
+        data_utils.validate_tcdm_footprint(total_size)
+
         assert (M % m_tiles) == 0, 'M is not an integer multiple of tile size'
         assert (N % n_tiles) == 0, 'N is not an integer multiple of tile size'
         assert (K % k_tiles) == 0, 'K is not an integer multiple of tile size'
diff --git a/sw/dnn/flashattention_2/scripts/datagen.py b/sw/dnn/flashattention_2/scripts/datagen.py
@@ -14,7 +14,6 @@
 import os
 import torch
 import pyflexfloat as ff
-import humanize
 
 sys.path.append(os.path.join(os.path.dirname(__file__), "../../../../util/sim/"))
 sys.path.append(os.path.join(os.path.dirname(__file__), "../../../blas/"))
@@ -31,8 +30,6 @@
 # the occurrence of these splits the data should be aligned to 4KB
 BURST_ALIGNMENT = 4096
 
-# Maximum available size in TCDM (in bytes)
-L1_HEAP_SIZE = 112 * 1024
 
 def torch_golden_model(Q, K, V):
     return torch.nn.functional.scaled_dot_product_attention(Q, K, V)
@@ -169,10 +166,7 @@ def validate_config(L, S, d, B_r, B_c, dtype, baseline, gemm_impl):
     total_size += o_fa_size
     total_size += m_i_size * 2  # m_i and m_i_prev
     total_size += l_i_size
-    assert total_size < L1_HEAP_SIZE, \
-        f'Total heap space required {humanize.naturalsize(total_size, binary=True)} exceeds ' \
-        f'limit of {humanize.naturalsize(L1_HEAP_SIZE, binary=True)}'
-    print(f'Total heap space required {humanize.naturalsize(total_size, binary=True)}')
+    data_utils.validate_tcdm_footprint(total_size)
 
     # Q*K^t
     gemm.datagen.GemmDataGen().validate_config(
diff --git a/sw/dnn/layernorm/scripts/datagen.py b/sw/dnn/layernorm/scripts/datagen.py
@@ -49,6 +49,17 @@ def golden_model_torch(ifmap, eps, shape):
 
 
 def validate_config(**kwargs):
+    # Aliases
+    batch_size = kwargs['input_dim']['batch_size']
+    seq_len = kwargs['input_dim']['seq_len']
+    embeddings = kwargs['input_dim']['embeddings']
+
+    # Calculate total TCDM occupation
+    prec = data_utils.size_from_precision_t(kwargs['prec'])
+    tiled_seq_len = seq_len / kwargs['n_tiles']
+    total_size = batch_size * tiled_seq_len * embeddings * prec
+    data_utils.validate_tcdm_footprint(total_size)
+
     assert kwargs['input_dim']['seq_len'] % kwargs['n_tiles'] == 0, 'Input dimension is not' \
                                                                     ' an integer multiple of' \
                                                                     ' tile size'