use static_chunk_size in flow training

aluminumbox · aluminumbox · commit d9ffd592f67d · 2025-04-07T22:34:45.000+08:00
diff --git a/cosyvoice/transformer/upsample_encoder.py b/cosyvoice/transformer/upsample_encoder.py
@@ -286,12 +286,7 @@ def forward(
             xs = self.global_cmvn(xs)
         xs, pos_emb, masks = self.embed(xs, masks)
         mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk if streaming is True else False,
-                                              self.use_dynamic_left_chunk if streaming is True else False,
-                                              decoding_chunk_size if streaming is True else 0,
-                                              self.static_chunk_size if streaming is True else 0,
-                                              num_decoding_left_chunks if streaming is True else -1)
+        chunk_masks = add_optional_chunk_mask(xs, masks, False, False, 0, self.static_chunk_size if streaming is True else 0, -1)
         # lookahead + conformer encoder
         xs, _ = self.pre_lookahead_layer(xs)
         xs = self.forward_layers(xs, chunk_masks, pos_emb, mask_pad)
@@ -304,12 +299,7 @@ def forward(
         masks = ~make_pad_mask(xs_lens, T).unsqueeze(1)  # (B, 1, T)
         xs, pos_emb, masks = self.up_embed(xs, masks)
         mask_pad = masks  # (B, 1, T/subsample_rate)
-        chunk_masks = add_optional_chunk_mask(xs, masks,
-                                              self.use_dynamic_chunk if streaming is True else False,
-                                              self.use_dynamic_left_chunk if streaming is True else False,
-                                              decoding_chunk_size if streaming is True else 0,
-                                              self.static_chunk_size * self.up_layer.stride if streaming is True else 0,
-                                              num_decoding_left_chunks if streaming is True else -1)
+        chunk_masks = add_optional_chunk_mask(xs, masks, False, False, 0, self.static_chunk_size * self.up_layer.stride if streaming is True else 0, -1)
         xs = self.forward_up_layers(xs, chunk_masks, pos_emb, mask_pad)
 
         if self.normalize_before: