diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 43815f2ef0..88cc8e7949 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -45,8 +45,11 @@ jobs: sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json - name: Install TTS run: | - python3 -m uv pip install --system "coqui-tts[dev,server,languages] @ ." - python3 setup.py egg_info + resolution=highest + if [ "${{ matrix.python-version }}" == "3.9" ]; then + resolution=lowest-direct + fi + python3 -m uv pip install --resolution=$resolution --system "coqui-tts[dev,server,languages] @ ." - name: Unit tests run: make ${{ matrix.subset }} - name: Upload coverage data diff --git a/TTS/bin/compute_attention_masks.py b/TTS/bin/compute_attention_masks.py index be275baa9c..127199186b 100644 --- a/TTS/bin/compute_attention_masks.py +++ b/TTS/bin/compute_attention_masks.py @@ -8,6 +8,7 @@ import torch from torch.utils.data import DataLoader from tqdm import tqdm +from trainer.io import load_checkpoint from TTS.config import load_config from TTS.tts.datasets.TTSDataset import TTSDataset @@ -15,7 +16,6 @@ from TTS.tts.utils.text.characters import make_symbols, phonemes, symbols from TTS.utils.audio import AudioProcessor from TTS.utils.generic_utils import ConsoleFormatter, setup_logger -from TTS.utils.io import load_checkpoint if __name__ == "__main__": setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) diff --git a/TTS/encoder/models/base_encoder.py b/TTS/encoder/models/base_encoder.py index 374062463d..f7137c2186 100644 --- a/TTS/encoder/models/base_encoder.py +++ b/TTS/encoder/models/base_encoder.py @@ -5,10 +5,10 @@ import torchaudio from coqpit import Coqpit from torch import nn +from trainer.io import load_fsspec from TTS.encoder.losses import AngleProtoLoss, GE2ELoss, SoftmaxAngleProtoLoss from TTS.utils.generic_utils import set_init_dict -from TTS.utils.io import load_fsspec logger = logging.getLogger(__name__) diff --git a/TTS/encoder/utils/training.py b/TTS/encoder/utils/training.py index 7692478d6b..cc3a78b084 100644 --- a/TTS/encoder/utils/training.py +++ b/TTS/encoder/utils/training.py @@ -3,14 +3,13 @@ from coqpit import Coqpit from trainer import TrainerArgs, get_last_checkpoint -from trainer.generic_utils import get_experiment_folder_path +from trainer.generic_utils import get_experiment_folder_path, get_git_branch from trainer.io import copy_model_files from trainer.logging import logger_factory from trainer.logging.console_logger import ConsoleLogger from TTS.config import load_config, register_config from TTS.tts.utils.text.characters import parse_symbols -from TTS.utils.generic_utils import get_git_branch @dataclass @@ -30,7 +29,7 @@ def process_args(args, config=None): args (argparse.Namespace or dict like): Parsed input arguments. config (Coqpit): Model config. If none, it is generated from `args`. Defaults to None. Returns: - c (TTS.utils.io.AttrDict): Config paramaters. + c (Coqpit): Config paramaters. out_path (str): Path to save models and logging. audio_path (str): Path to save generated test audios. c_logger (TTS.utils.console_logger.ConsoleLogger): Class that does diff --git a/TTS/model.py b/TTS/model.py index 01dd515d81..c3707c85ae 100644 --- a/TTS/model.py +++ b/TTS/model.py @@ -60,6 +60,7 @@ def load_checkpoint( checkpoint_path (str | os.PathLike): Path to the model checkpoint file. eval (bool, optional): If true, init model for inference else for training. Defaults to False. strict (bool, optional): Match all checkpoint keys to model's keys. Defaults to True. - cache (bool, optional): If True, cache the file locally for subsequent calls. It is cached under `get_user_data_dir()/tts_cache`. Defaults to False. + cache (bool, optional): If True, cache the file locally for subsequent calls. + It is cached under `trainer.io.get_user_data_dir()/tts_cache`. Defaults to False. """ ... diff --git a/TTS/tts/configs/bark_config.py b/TTS/tts/configs/bark_config.py index 4d1cd1374a..3b893558aa 100644 --- a/TTS/tts/configs/bark_config.py +++ b/TTS/tts/configs/bark_config.py @@ -2,11 +2,12 @@ from dataclasses import dataclass, field from typing import Dict +from trainer.io import get_user_data_dir + from TTS.tts.configs.shared_configs import BaseTTSConfig from TTS.tts.layers.bark.model import GPTConfig from TTS.tts.layers.bark.model_fine import FineGPTConfig from TTS.tts.models.bark import BarkAudioConfig -from TTS.utils.generic_utils import get_user_data_dir @dataclass diff --git a/TTS/tts/layers/xtts/hifigan_decoder.py b/TTS/tts/layers/xtts/hifigan_decoder.py index 9160529bf9..b6032e5584 100644 --- a/TTS/tts/layers/xtts/hifigan_decoder.py +++ b/TTS/tts/layers/xtts/hifigan_decoder.py @@ -7,8 +7,8 @@ from torch.nn import functional as F from torch.nn.utils.parametrizations import weight_norm from torch.nn.utils.parametrize import remove_parametrizations +from trainer.io import load_fsspec -from TTS.utils.io import load_fsspec from TTS.vocoder.models.hifigan_generator import get_padding logger = logging.getLogger(__name__) diff --git a/TTS/tts/layers/xtts/trainer/gpt_trainer.py b/TTS/tts/layers/xtts/trainer/gpt_trainer.py index 0f161324f8..04d123778b 100644 --- a/TTS/tts/layers/xtts/trainer/gpt_trainer.py +++ b/TTS/tts/layers/xtts/trainer/gpt_trainer.py @@ -7,6 +7,7 @@ import torchaudio from coqpit import Coqpit from torch.utils.data import DataLoader +from trainer.io import load_fsspec from trainer.torch import DistributedSampler from trainer.trainer_utils import get_optimizer, get_scheduler @@ -18,7 +19,6 @@ from TTS.tts.layers.xtts.trainer.dataset import XTTSDataset from TTS.tts.models.base_tts import BaseTTS from TTS.tts.models.xtts import Xtts, XttsArgs, XttsAudioConfig -from TTS.utils.io import load_fsspec logger = logging.getLogger(__name__) diff --git a/TTS/tts/models/align_tts.py b/TTS/tts/models/align_tts.py index 18b9cde385..2d27a57850 100644 --- a/TTS/tts/models/align_tts.py +++ b/TTS/tts/models/align_tts.py @@ -4,6 +4,7 @@ import torch from coqpit import Coqpit from torch import nn +from trainer.io import load_fsspec from TTS.tts.layers.align_tts.mdn import MDNBlock from TTS.tts.layers.feed_forward.decoder import Decoder @@ -15,7 +16,6 @@ from TTS.tts.utils.speakers import SpeakerManager from TTS.tts.utils.text.tokenizer import TTSTokenizer from TTS.tts.utils.visual import plot_alignment, plot_spectrogram -from TTS.utils.io import load_fsspec @dataclass diff --git a/TTS/tts/models/base_tacotron.py b/TTS/tts/models/base_tacotron.py index 33e1c11ab7..79cdf1a7d4 100644 --- a/TTS/tts/models/base_tacotron.py +++ b/TTS/tts/models/base_tacotron.py @@ -6,6 +6,7 @@ import torch from coqpit import Coqpit from torch import nn +from trainer.io import load_fsspec from TTS.tts.layers.losses import TacotronLoss from TTS.tts.models.base_tts import BaseTTS @@ -15,7 +16,6 @@ from TTS.tts.utils.text.tokenizer import TTSTokenizer from TTS.tts.utils.visual import plot_alignment, plot_spectrogram from TTS.utils.generic_utils import format_aux_input -from TTS.utils.io import load_fsspec from TTS.utils.training import gradual_training_scheduler logger = logging.getLogger(__name__) @@ -103,7 +103,8 @@ def load_checkpoint( config (Coqpi): model configuration. checkpoint_path (str): path to checkpoint file. eval (bool, optional): whether to load model for evaluation. - cache (bool, optional): If True, cache the file locally for subsequent calls. It is cached under `get_user_data_dir()/tts_cache`. Defaults to False. + cache (bool, optional): If True, cache the file locally for subsequent calls. + It is cached under `trainer.io.get_user_data_dir()/tts_cache`. Defaults to False. """ state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache) self.load_state_dict(state["model"]) diff --git a/TTS/tts/models/delightful_tts.py b/TTS/tts/models/delightful_tts.py index 4230fcc33d..a938a3a4ab 100644 --- a/TTS/tts/models/delightful_tts.py +++ b/TTS/tts/models/delightful_tts.py @@ -16,6 +16,7 @@ from torch.nn import functional as F from torch.utils.data import DataLoader from torch.utils.data.sampler import WeightedRandomSampler +from trainer.io import load_fsspec from trainer.torch import DistributedSampler, DistributedSamplerWrapper from trainer.trainer_utils import get_optimizer, get_scheduler @@ -32,7 +33,6 @@ from TTS.utils.audio.numpy_transforms import db_to_amp as db_to_amp_numpy from TTS.utils.audio.numpy_transforms import mel_to_wav as mel_to_wav_numpy from TTS.utils.audio.processor import AudioProcessor -from TTS.utils.io import load_fsspec from TTS.vocoder.layers.losses import MultiScaleSTFTLoss from TTS.vocoder.models.hifigan_generator import HifiganGenerator from TTS.vocoder.utils.generic_utils import plot_results diff --git a/TTS/tts/models/forward_tts.py b/TTS/tts/models/forward_tts.py index b108a554d5..4b74462dd5 100644 --- a/TTS/tts/models/forward_tts.py +++ b/TTS/tts/models/forward_tts.py @@ -6,6 +6,7 @@ from coqpit import Coqpit from torch import nn from torch.cuda.amp.autocast_mode import autocast +from trainer.io import load_fsspec from TTS.tts.layers.feed_forward.decoder import Decoder from TTS.tts.layers.feed_forward.encoder import Encoder @@ -17,7 +18,6 @@ from TTS.tts.utils.speakers import SpeakerManager from TTS.tts.utils.text.tokenizer import TTSTokenizer from TTS.tts.utils.visual import plot_alignment, plot_avg_energy, plot_avg_pitch, plot_spectrogram -from TTS.utils.io import load_fsspec logger = logging.getLogger(__name__) diff --git a/TTS/tts/models/glow_tts.py b/TTS/tts/models/glow_tts.py index a4ae012166..64954d283c 100644 --- a/TTS/tts/models/glow_tts.py +++ b/TTS/tts/models/glow_tts.py @@ -7,6 +7,7 @@ from torch import nn from torch.cuda.amp.autocast_mode import autocast from torch.nn import functional as F +from trainer.io import load_fsspec from TTS.tts.configs.glow_tts_config import GlowTTSConfig from TTS.tts.layers.glow_tts.decoder import Decoder @@ -17,7 +18,6 @@ from TTS.tts.utils.synthesis import synthesis from TTS.tts.utils.text.tokenizer import TTSTokenizer from TTS.tts.utils.visual import plot_alignment, plot_spectrogram -from TTS.utils.io import load_fsspec logger = logging.getLogger(__name__) diff --git a/TTS/tts/models/neuralhmm_tts.py b/TTS/tts/models/neuralhmm_tts.py index d5bd9d1311..277369e644 100644 --- a/TTS/tts/models/neuralhmm_tts.py +++ b/TTS/tts/models/neuralhmm_tts.py @@ -5,6 +5,7 @@ import torch from coqpit import Coqpit from torch import nn +from trainer.io import load_fsspec from trainer.logging.tensorboard_logger import TensorboardLogger from TTS.tts.layers.overflow.common_layers import Encoder, OverflowUtils @@ -18,7 +19,6 @@ from TTS.tts.utils.text.tokenizer import TTSTokenizer from TTS.tts.utils.visual import plot_alignment, plot_spectrogram from TTS.utils.generic_utils import format_aux_input -from TTS.utils.io import load_fsspec logger = logging.getLogger(__name__) diff --git a/TTS/tts/models/overflow.py b/TTS/tts/models/overflow.py index 0218d0452b..b05b75009b 100644 --- a/TTS/tts/models/overflow.py +++ b/TTS/tts/models/overflow.py @@ -5,6 +5,7 @@ import torch from coqpit import Coqpit from torch import nn +from trainer.io import load_fsspec from trainer.logging.tensorboard_logger import TensorboardLogger from TTS.tts.layers.overflow.common_layers import Encoder, OverflowUtils @@ -19,7 +20,6 @@ from TTS.tts.utils.text.tokenizer import TTSTokenizer from TTS.tts.utils.visual import plot_alignment, plot_spectrogram from TTS.utils.generic_utils import format_aux_input -from TTS.utils.io import load_fsspec logger = logging.getLogger(__name__) diff --git a/TTS/tts/models/vits.py b/TTS/tts/models/vits.py index 2552133753..b014e4fdde 100644 --- a/TTS/tts/models/vits.py +++ b/TTS/tts/models/vits.py @@ -16,6 +16,7 @@ from torch.nn import functional as F from torch.utils.data import DataLoader from torch.utils.data.sampler import WeightedRandomSampler +from trainer.io import load_fsspec from trainer.torch import DistributedSampler, DistributedSamplerWrapper from trainer.trainer_utils import get_optimizer, get_scheduler @@ -34,7 +35,6 @@ from TTS.tts.utils.text.characters import BaseCharacters, BaseVocabulary, _characters, _pad, _phonemes, _punctuations from TTS.tts.utils.text.tokenizer import TTSTokenizer from TTS.tts.utils.visual import plot_alignment -from TTS.utils.io import load_fsspec from TTS.utils.samplers import BucketBatchSampler from TTS.vocoder.models.hifigan_generator import HifiganGenerator from TTS.vocoder.utils.generic_utils import plot_results diff --git a/TTS/tts/models/xtts.py b/TTS/tts/models/xtts.py index e6d245a041..fa320aacd0 100644 --- a/TTS/tts/models/xtts.py +++ b/TTS/tts/models/xtts.py @@ -7,6 +7,7 @@ import torch.nn.functional as F import torchaudio from coqpit import Coqpit +from trainer.io import load_fsspec from TTS.tts.layers.xtts.gpt import GPT from TTS.tts.layers.xtts.hifigan_decoder import HifiDecoder @@ -14,7 +15,6 @@ from TTS.tts.layers.xtts.tokenizer import VoiceBpeTokenizer, split_sentence from TTS.tts.layers.xtts.xtts_manager import LanguageManager, SpeakerManager from TTS.tts.models.base_tts import BaseTTS -from TTS.utils.io import load_fsspec logger = logging.getLogger(__name__) diff --git a/TTS/utils/generic_utils.py b/TTS/utils/generic_utils.py index 48c090715f..91f8844262 100644 --- a/TTS/utils/generic_utils.py +++ b/TTS/utils/generic_utils.py @@ -2,29 +2,13 @@ import datetime import importlib import logging -import os import re -import subprocess -import sys from pathlib import Path from typing import Dict, Optional logger = logging.getLogger(__name__) -# TODO: This method is duplicated in Trainer but out of date there -def get_git_branch(): - try: - out = subprocess.check_output(["git", "branch"]).decode("utf8") - current = next(line for line in out.split("\n") if line.startswith("*")) - current.replace("* ", "") - except subprocess.CalledProcessError: - current = "inside_docker" - except (FileNotFoundError, StopIteration) as e: - current = "unknown" - return current - - def to_camel(text): text = text.capitalize() text = re.sub(r"(?!^)_([a-zA-Z])", lambda m: m.group(1).upper(), text) @@ -67,28 +51,6 @@ def get_import_path(obj: object) -> str: return ".".join([type(obj).__module__, type(obj).__name__]) -def get_user_data_dir(appname): - TTS_HOME = os.environ.get("TTS_HOME") - XDG_DATA_HOME = os.environ.get("XDG_DATA_HOME") - if TTS_HOME is not None: - ans = Path(TTS_HOME).expanduser().resolve(strict=False) - elif XDG_DATA_HOME is not None: - ans = Path(XDG_DATA_HOME).expanduser().resolve(strict=False) - elif sys.platform == "win32": - import winreg # pylint: disable=import-outside-toplevel - - key = winreg.OpenKey( - winreg.HKEY_CURRENT_USER, r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders" - ) - dir_, _ = winreg.QueryValueEx(key, "Local AppData") - ans = Path(dir_).resolve(strict=False) - elif sys.platform == "darwin": - ans = Path("~/Library/Application Support/").expanduser() - else: - ans = Path.home().joinpath(".local/share") - return ans.joinpath(appname) - - def set_init_dict(model_dict, checkpoint_state, c): # Partial initialization: if there is a mismatch with new and old layer, it is skipped. for k, v in checkpoint_state.items(): diff --git a/TTS/utils/io.py b/TTS/utils/io.py deleted file mode 100644 index 3107ba661b..0000000000 --- a/TTS/utils/io.py +++ /dev/null @@ -1,70 +0,0 @@ -import os -import pickle as pickle_tts -from typing import Any, Callable, Dict, Union - -import fsspec -import torch - -from TTS.utils.generic_utils import get_user_data_dir - - -class RenamingUnpickler(pickle_tts.Unpickler): - """Overload default pickler to solve module renaming problem""" - - def find_class(self, module, name): - return super().find_class(module.replace("mozilla_voice_tts", "TTS"), name) - - -class AttrDict(dict): - """A custom dict which converts dict keys - to class attributes""" - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.__dict__ = self - - -def load_fsspec( - path: str, - map_location: Union[str, Callable, torch.device, Dict[Union[str, torch.device], Union[str, torch.device]]] = None, - cache: bool = True, - **kwargs, -) -> Any: - """Like torch.load but can load from other locations (e.g. s3:// , gs://). - - Args: - path: Any path or url supported by fsspec. - map_location: torch.device or str. - cache: If True, cache a remote file locally for subsequent calls. It is cached under `get_user_data_dir()/tts_cache`. Defaults to True. - **kwargs: Keyword arguments forwarded to torch.load. - - Returns: - Object stored in path. - """ - is_local = os.path.isdir(path) or os.path.isfile(path) - if cache and not is_local: - with fsspec.open( - f"filecache::{path}", - filecache={"cache_storage": str(get_user_data_dir("tts_cache"))}, - mode="rb", - ) as f: - return torch.load(f, map_location=map_location, **kwargs) - else: - with fsspec.open(path, "rb") as f: - return torch.load(f, map_location=map_location, **kwargs) - - -def load_checkpoint( - model, checkpoint_path, use_cuda=False, eval=False, cache=False -): # pylint: disable=redefined-builtin - try: - state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache) - except ModuleNotFoundError: - pickle_tts.Unpickler = RenamingUnpickler - state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), pickle_module=pickle_tts, cache=cache) - model.load_state_dict(state["model"]) - if use_cuda: - model.cuda() - if eval: - model.eval() - return model, state diff --git a/TTS/utils/manage.py b/TTS/utils/manage.py index d4781d54e6..fb5071d9b0 100644 --- a/TTS/utils/manage.py +++ b/TTS/utils/manage.py @@ -11,9 +11,9 @@ import fsspec import requests from tqdm import tqdm +from trainer.io import get_user_data_dir from TTS.config import load_config, read_json_with_comments -from TTS.utils.generic_utils import get_user_data_dir logger = logging.getLogger(__name__) diff --git a/TTS/vc/models/freevc.py b/TTS/vc/models/freevc.py index 7746572f23..e5cfdc1e61 100644 --- a/TTS/vc/models/freevc.py +++ b/TTS/vc/models/freevc.py @@ -11,12 +11,12 @@ from torch.nn.utils import spectral_norm from torch.nn.utils.parametrizations import weight_norm from torch.nn.utils.parametrize import remove_parametrizations +from trainer.io import load_fsspec import TTS.vc.modules.freevc.commons as commons import TTS.vc.modules.freevc.modules as modules from TTS.tts.utils.helpers import sequence_mask from TTS.tts.utils.speakers import SpeakerManager -from TTS.utils.io import load_fsspec from TTS.vc.configs.freevc_config import FreeVCConfig from TTS.vc.models.base_vc import BaseVC from TTS.vc.modules.freevc.commons import init_weights diff --git a/TTS/vc/modules/freevc/speaker_encoder/speaker_encoder.py b/TTS/vc/modules/freevc/speaker_encoder/speaker_encoder.py index 2636400b90..294bf322cb 100644 --- a/TTS/vc/modules/freevc/speaker_encoder/speaker_encoder.py +++ b/TTS/vc/modules/freevc/speaker_encoder/speaker_encoder.py @@ -5,8 +5,8 @@ import numpy as np import torch from torch import nn +from trainer.io import load_fsspec -from TTS.utils.io import load_fsspec from TTS.vc.modules.freevc.speaker_encoder import audio from TTS.vc.modules.freevc.speaker_encoder.hparams import ( mel_n_channels, diff --git a/TTS/vc/modules/freevc/wavlm/__init__.py b/TTS/vc/modules/freevc/wavlm/__init__.py index 0033d22c48..03b2f5827b 100644 --- a/TTS/vc/modules/freevc/wavlm/__init__.py +++ b/TTS/vc/modules/freevc/wavlm/__init__.py @@ -3,8 +3,8 @@ import urllib.request import torch +from trainer.io import get_user_data_dir -from TTS.utils.generic_utils import get_user_data_dir from TTS.vc.modules.freevc.wavlm.wavlm import WavLM, WavLMConfig logger = logging.getLogger(__name__) diff --git a/TTS/vocoder/layers/losses.py b/TTS/vocoder/layers/losses.py index 1f977755cc..8d4dd725ef 100644 --- a/TTS/vocoder/layers/losses.py +++ b/TTS/vocoder/layers/losses.py @@ -221,7 +221,7 @@ class GeneratorLoss(nn.Module): changing configurations. Args: - C (AttrDict): model configuration. + C (Coqpit): model configuration. """ def __init__(self, C): diff --git a/TTS/vocoder/models/gan.py b/TTS/vocoder/models/gan.py index 9b6508d8ba..8792950a56 100644 --- a/TTS/vocoder/models/gan.py +++ b/TTS/vocoder/models/gan.py @@ -7,10 +7,10 @@ from torch import nn from torch.utils.data import DataLoader from torch.utils.data.distributed import DistributedSampler +from trainer.io import load_fsspec from trainer.trainer_utils import get_optimizer, get_scheduler from TTS.utils.audio import AudioProcessor -from TTS.utils.io import load_fsspec from TTS.vocoder.datasets.gan_dataset import GANDataset from TTS.vocoder.layers.losses import DiscriminatorLoss, GeneratorLoss from TTS.vocoder.models import setup_discriminator, setup_generator diff --git a/TTS/vocoder/models/hifigan_generator.py b/TTS/vocoder/models/hifigan_generator.py index 083ce344fb..afdd59a859 100644 --- a/TTS/vocoder/models/hifigan_generator.py +++ b/TTS/vocoder/models/hifigan_generator.py @@ -7,8 +7,7 @@ from torch.nn import functional as F from torch.nn.utils.parametrizations import weight_norm from torch.nn.utils.parametrize import remove_parametrizations - -from TTS.utils.io import load_fsspec +from trainer.io import load_fsspec logger = logging.getLogger(__name__) diff --git a/TTS/vocoder/models/melgan_generator.py b/TTS/vocoder/models/melgan_generator.py index bb3fee789c..03c971afa4 100644 --- a/TTS/vocoder/models/melgan_generator.py +++ b/TTS/vocoder/models/melgan_generator.py @@ -1,8 +1,8 @@ import torch from torch import nn from torch.nn.utils.parametrizations import weight_norm +from trainer.io import load_fsspec -from TTS.utils.io import load_fsspec from TTS.vocoder.layers.melgan import ResidualStack diff --git a/TTS/vocoder/models/parallel_wavegan_generator.py b/TTS/vocoder/models/parallel_wavegan_generator.py index 96684d2a0a..6a4d4ca6e7 100644 --- a/TTS/vocoder/models/parallel_wavegan_generator.py +++ b/TTS/vocoder/models/parallel_wavegan_generator.py @@ -4,8 +4,8 @@ import numpy as np import torch from torch.nn.utils.parametrize import remove_parametrizations +from trainer.io import load_fsspec -from TTS.utils.io import load_fsspec from TTS.vocoder.layers.parallel_wavegan import ResidualBlock from TTS.vocoder.layers.upsample import ConvUpsample diff --git a/TTS/vocoder/models/wavegrad.py b/TTS/vocoder/models/wavegrad.py index 70d9edb342..c49abd2201 100644 --- a/TTS/vocoder/models/wavegrad.py +++ b/TTS/vocoder/models/wavegrad.py @@ -9,9 +9,9 @@ from torch.nn.utils.parametrize import remove_parametrizations from torch.utils.data import DataLoader from torch.utils.data.distributed import DistributedSampler +from trainer.io import load_fsspec from trainer.trainer_utils import get_optimizer, get_scheduler -from TTS.utils.io import load_fsspec from TTS.vocoder.datasets import WaveGradDataset from TTS.vocoder.layers.wavegrad import Conv1d, DBlock, FiLM, UBlock from TTS.vocoder.models.base_vocoder import BaseVocoder diff --git a/TTS/vocoder/models/wavernn.py b/TTS/vocoder/models/wavernn.py index 901afdff11..723f18dde2 100644 --- a/TTS/vocoder/models/wavernn.py +++ b/TTS/vocoder/models/wavernn.py @@ -10,11 +10,11 @@ from torch import nn from torch.utils.data import DataLoader from torch.utils.data.distributed import DistributedSampler +from trainer.io import load_fsspec from TTS.tts.utils.visual import plot_spectrogram from TTS.utils.audio import AudioProcessor from TTS.utils.audio.numpy_transforms import mulaw_decode -from TTS.utils.io import load_fsspec from TTS.vocoder.datasets.wavernn_dataset import WaveRNNDataset from TTS.vocoder.layers.losses import WaveRNNLoss from TTS.vocoder.models.base_vocoder import BaseVocoder diff --git a/pyproject.toml b/pyproject.toml index dad0d5ed0d..07f15d0595 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,7 @@ [build-system] requires = [ "setuptools", + "setuptools-scm", "cython~=0.29.30", "numpy>=2.0.0", ] @@ -63,7 +64,7 @@ dependencies = [ # Training "matplotlib>=3.7.0", # Coqui stack - "coqui-tts-trainer>=0.1", + "coqui-tts-trainer>=0.1.4", "coqpit>=0.0.16", # Gruut + supported languages "gruut[de,es,fr]==2.2.3", @@ -73,7 +74,7 @@ dependencies = [ # Bark "encodec>=0.1.1", # XTTS - "num2words", + "num2words>=0.5.11", "spacy[ja]>=3" ] @@ -81,20 +82,20 @@ dependencies = [ # Development dependencies dev = [ "black==24.2.0", - "coverage[toml]", - "nose2", - "pre-commit", + "coverage[toml]>=7", + "nose2>=0.15", + "pre-commit>=3", "ruff==0.4.9", - "tomli; python_version < '3.11'", + "tomli>=2; python_version < '3.11'", ] # Dependencies for building the documentation docs = [ - "furo", + "furo>=2023.5.20", "myst-parser==2.0.0", "sphinx==7.2.5", - "sphinx_inline_tabs", - "sphinx_copybutton", - "linkify-it-py", + "sphinx_inline_tabs>=2023.4.21", + "sphinx_copybutton>=0.1", + "linkify-it-py>=2.0.0", ] # Only used in notebooks notebooks = [ @@ -102,30 +103,30 @@ notebooks = [ "pandas>=1.4,<2.0", ] # For running the TTS server -server = ["flask>=2.0.1"] +server = ["flask>=3.0.0"] # Language-specific dependencies, mainly for G2P # Bangla bn = [ - "bangla", - "bnnumerizer", - "bnunicodenormalizer", + "bangla>=0.0.2", + "bnnumerizer>=0.0.2", + "bnunicodenormalizer>=0.1.0", ] # Korean ko = [ - "hangul_romanize", - "jamo", + "hangul_romanize>=0.1.0", + "jamo>=0.4.1", "g2pkk>=0.1.1", ] # Japanese ja = [ - "mecab-python3", + "mecab-python3>=1.0.2", "unidic-lite==1.0.8", - "cutlet", + "cutlet>=0.2.0", ] # Chinese zh = [ - "jieba", - "pypinyin", + "jieba>=0.42.1", + "pypinyin>=0.40.0", ] # All language-specific dependencies languages = [ diff --git a/requirements.dev.txt b/requirements.dev.txt index 1e4a7beff7..74ec0cd80c 100644 --- a/requirements.dev.txt +++ b/requirements.dev.txt @@ -1,8 +1,8 @@ # Generated via scripts/generate_requirements.py and pre-commit hook. # Do not edit this file; modify pyproject.toml instead. black==24.2.0 -coverage[toml] -nose2 -pre-commit +coverage[toml]>=7 +nose2>=0.15 +pre-commit>=3 ruff==0.4.9 -tomli; python_version < '3.11' +tomli>=2; python_version < '3.11' diff --git a/tests/zoo_tests/test_models.py b/tests/zoo_tests/test_models.py index 1c28e8609c..b944423988 100644 --- a/tests/zoo_tests/test_models.py +++ b/tests/zoo_tests/test_models.py @@ -4,11 +4,11 @@ import shutil import torch +from trainer.io import get_user_data_dir from tests import get_tests_data_path, get_tests_output_path, run_cli from TTS.tts.utils.languages import LanguageManager from TTS.tts.utils.speakers import SpeakerManager -from TTS.utils.generic_utils import get_user_data_dir from TTS.utils.manage import ModelManager MODELS_WITH_SEP_TESTS = [