You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[rank0]: Traceback (most recent call last):
[rank0]: File "/generate.py", line 411, in
[rank0]: generate(args)
[rank0]: File "/generate.py", line 369, in generate
[rank0]: video = wan_i2v.generate(
[rank0]: ^^^^^^^^^^^^^^^^^
[rank0]: File "/wan/image2video.py", line 337, in generate
[rank0]: videos = self.vae.decode(x0)
[rank0]: ^^^^^^^^^^^^^^^^^^^
[rank0]: File "/wan/modules/vae.py", line 659, in decode
[rank0]: return [
[rank0]: ^
[rank0]: File "/wan/modules/vae.py", line 660, in
[rank0]: self.model.decode(u.unsqueeze(0),
[rank0]: File "/wan/modules/vae.py", line 562, in decode
[rank0]: out_ = self.decoder(
[rank0]: ^^^^^^^^^^^^^
[rank0]: File "/root/openr1/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
[rank0]: return self._call_impl(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/openr1/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
[rank0]: return forward_call(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/wan/modules/vae.py", line 451, in forward
[rank0]: x = layer(x, feat_cache, feat_idx)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/openr1/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
[rank0]: return self._call_impl(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/openr1/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
[rank0]: return forward_call(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/wan/modules/vae.py", line 215, in forward
[rank0]: x = layer(x, feat_cache[idx])
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/openr1/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
[rank0]: return self._call_impl(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/openr1/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
[rank0]: return forward_call(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/wan/modules/vae.py", line 36, in forward
[rank0]: return super().forward(x)
[rank0]: ^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/openr1/lib/python3.11/site-packages/torch/nn/modules/conv.py", line 725, in forward
[rank0]: return self._conv_forward(input, self.weight, self.bias)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/openr1/lib/python3.11/site-packages/torch/nn/modules/conv.py", line 720, in _conv_forward
[rank0]: return (
[rank0]: ^^^^^^^^^
[rank0]: RuntimeError: CUDA driver error: invalid argument
[rank0]:[W321 22:55:33.464215343 ProcessGroupNCCL.cpp:1250] Warning: WARNING: process group has NOT been destroyed before we destruct ProcessGroupNCCL. On normal program exit, the application should call destroy_process_group to ensure that any pending NCCL operations have finished in this process. In rare cases this process can exit before this point and block the progress of another member of the process group. This constraint has always been present, but this warning has only been added since PyTorch 2.4 (function operator())
请问该如何诊断或者修复呢?
The text was updated successfully, but these errors were encountered:
8卡A100 40G
[rank0]: Traceback (most recent call last):
[rank0]: File "/generate.py", line 411, in
[rank0]: generate(args)
[rank0]: File "/generate.py", line 369, in generate
[rank0]: video = wan_i2v.generate(
[rank0]: ^^^^^^^^^^^^^^^^^
[rank0]: File "/wan/image2video.py", line 337, in generate
[rank0]: videos = self.vae.decode(x0)
[rank0]: ^^^^^^^^^^^^^^^^^^^
[rank0]: File "/wan/modules/vae.py", line 659, in decode
[rank0]: return [
[rank0]: ^
[rank0]: File "/wan/modules/vae.py", line 660, in
[rank0]: self.model.decode(u.unsqueeze(0),
[rank0]: File "/wan/modules/vae.py", line 562, in decode
[rank0]: out_ = self.decoder(
[rank0]: ^^^^^^^^^^^^^
[rank0]: File "/root/openr1/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
[rank0]: return self._call_impl(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/openr1/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
[rank0]: return forward_call(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/wan/modules/vae.py", line 451, in forward
[rank0]: x = layer(x, feat_cache, feat_idx)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/openr1/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
[rank0]: return self._call_impl(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/openr1/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
[rank0]: return forward_call(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/wan/modules/vae.py", line 215, in forward
[rank0]: x = layer(x, feat_cache[idx])
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/openr1/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
[rank0]: return self._call_impl(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/openr1/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
[rank0]: return forward_call(*args, **kwargs)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/wan/modules/vae.py", line 36, in forward
[rank0]: return super().forward(x)
[rank0]: ^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/openr1/lib/python3.11/site-packages/torch/nn/modules/conv.py", line 725, in forward
[rank0]: return self._conv_forward(input, self.weight, self.bias)
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: File "/root/openr1/lib/python3.11/site-packages/torch/nn/modules/conv.py", line 720, in _conv_forward
[rank0]: return (
[rank0]: ^^^^^^^^^
[rank0]: RuntimeError: CUDA driver error: invalid argument
[rank0]:[W321 22:55:33.464215343 ProcessGroupNCCL.cpp:1250] Warning: WARNING: process group has NOT been destroyed before we destruct ProcessGroupNCCL. On normal program exit, the application should call destroy_process_group to ensure that any pending NCCL operations have finished in this process. In rare cases this process can exit before this point and block the progress of another member of the process group. This constraint has always been present, but this warning has only been added since PyTorch 2.4 (function operator())
请问该如何诊断或者修复呢?
The text was updated successfully, but these errors were encountered: