Skip to content

Commit

Permalink
feat: add gui (#3)
Browse files Browse the repository at this point in the history
  • Loading branch information
34j authored Mar 17, 2023
1 parent 7b74606 commit 34aec2b
Show file tree
Hide file tree
Showing 7 changed files with 328 additions and 30 deletions.
27 changes: 21 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,33 @@ pip install so-vits-svc-fork
## Features not available in the original repo

- **Realtime voice conversion**
- GUI available
- Unified command-line interface (no need to run Python scripts)
- Ready to use just by installing with `pip`.
- Automatically download pretrained base model and HuBERT model
- Code completely formatted with black, isort, autoflake etc.

## Usage

### Realtime Voice conversion
### Inference

#### GUI

![GUI](https://raw.githubusercontent.com/34j/so-vits-svc-fork/main/docs/_static/gui.png)

```shell
svcg
```

#### CLI

- Realtime (from microphone)

```shell
svc --model-path <model-path> source.wav
```

- File

```shell
svc vc --model-path <model-path>
Expand All @@ -70,11 +89,7 @@ svc pre-hubert
svc train
```

### Inference

```shell
svc --model-path <model-path> source.wav
```
### Further help

For more details, run `svc -h` or `svc <subcommand> -h`.

Expand Down
Binary file added docs/_static/gui.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
26 changes: 25 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ packages = [
[tool.poetry.scripts]
so-vits-svc-fork = "so_vits_svc_fork.__main__:cli"
svc = "so_vits_svc_fork.__main__:cli"
svcf = "so_vits_svc_fork.__main__:cli"
svcg = "so_vits_svc_fork.gui:main"
svc-gui = "so_vits_svc_fork.gui:main"
svcf-gui = "so_vits_svc_fork.gui:main"

[tool.poetry.urls]
"Bug Tracker" = "https://github.com/34j/so-vits-svc-fork/issues"
Expand Down Expand Up @@ -55,6 +59,8 @@ tqdm-joblib = "*"
tensorboardx = "*"
pyinputplus = "*"
cm-time = "^0.1.2"
pysimplegui = ">=4.6"
pebble = "^5.0.3"

[tool.poetry.group.dev.dependencies]
pre-commit = ">=3"
Expand Down
29 changes: 17 additions & 12 deletions src/so_vits_svc_fork/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,22 @@
import torch
from rich.logging import RichHandler

IN_COLAB = os.getenv("COLAB_RELEASE_TAG")

basicConfig(
level=INFO,
format="%(asctime)s %(message)s",
datefmt="[%X]",
handlers=[
RichHandler() if not IN_COLAB else StreamHandler(),
FileHandler(f"{__name__.split('.')[0]}.log"),
],
)
captureWarnings(True)

def init_logger() -> None:
IN_COLAB = os.getenv("COLAB_RELEASE_TAG")

basicConfig(
level=INFO,
format="%(asctime)s %(message)s",
datefmt="[%X]",
handlers=[
RichHandler() if not IN_COLAB else StreamHandler(),
FileHandler(f"{__name__.split('.')[0]}.log"),
],
)
captureWarnings(True)


LOG = getLogger(__name__)


Expand All @@ -48,6 +52,7 @@ def cli():
To train a model, run pre-resample, pre-config, pre-hubert, train.
To infer a model, run infer.
"""
init_logger()


@click.help_option("--help", "-h")
Expand Down
234 changes: 234 additions & 0 deletions src/so_vits_svc_fork/gui.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
from logging import getLogger
from pathlib import Path

import PySimpleGUI as sg
import sounddevice as sd
import soundfile as sf
from pebble import ProcessPool

from .__main__ import init_logger

LOG = getLogger(__name__)

init_logger()


def play_audio(path: Path | str):
if isinstance(path, Path):
path = path.as_posix()
data, sr = sf.read(path)
sd.play(data, sr)


def main():
sg.theme("Dark")
model_candidates = list(sorted(Path("./logs/44k/").glob("G_*.pth")))
layout = [
[
sg.Text("Model path: "),
sg.InputText(
key="model_path",
default_text=model_candidates[-1].as_posix()
if model_candidates
else "",
),
sg.FileBrowse(
initial_folder="./logs/44k/" if Path("./logs/44k/").exists() else "."
),
],
[
sg.Text("Config path: "),
sg.InputText(
key="config_path",
default_text="./configs/44k/config.json",
enable_events=True,
),
sg.FileBrowse(
initial_folder="./configs/44k/"
if Path("./configs/44k/").exists()
else "."
),
],
[sg.Text("Speaker"), sg.Combo(values=[], key="speaker", size=(20, 1))],
[
sg.Text("Input audio path:"),
sg.InputText(key="input_path"),
sg.FileBrowse(initial_folder="."),
sg.Button("Play", key="play_input"),
],
[
sg.Text("Silence threshold: "),
sg.Slider(
range=(-60.0, 0),
orientation="h",
key="silence_threshold",
default_value=-20,
resolution=0.1,
),
],
[
sg.Checkbox(
key="auto_predict_f0",
default=True,
text="Auto predict F0 (Pitch may become unstable when turned on in real-time inference.)",
)
],
[
sg.Text("Pitch: "),
sg.Slider(
range=(-20, 20), orientation="h", key="transpose", default_value=0
),
],
[
sg.Text("Cluster infer ratio: "),
sg.Slider(
range=(0, 1.0),
orientation="h",
key="cluster_infer_ratio",
default_value=0,
resolution=0.01,
),
],
[
sg.Text("Cluster model path: "),
sg.InputText(key="cluster_model_path"),
sg.FileBrowse(),
],
[
sg.Text("Noise scale: "),
sg.Slider(
range=(0.0, 1.0),
orientation="h",
key="noise_scale",
default_value=0.4,
resolution=0.01,
),
],
[
sg.Text("Pad seconds"),
sg.Slider(
range=(0.0, 1.0),
orientation="h",
key="pad_seconds",
default_value=0.1,
resolution=0.01,
),
],
[
sg.Text("Crossfade seconds"),
sg.Slider(
range=(0, 0.6),
orientation="h",
key="crossfade_seconds",
default_value=0.1,
resolution=0.001,
),
],
[
sg.Text("Block seconds"),
sg.Slider(
range=(0, 3.0),
orientation="h",
key="block_seconds",
default_value=1,
resolution=0.01,
),
],
[sg.Checkbox(key="use_gpu", default=True, text="Use GPU")],
[sg.Checkbox(key="auto_play", default=True, text="Auto play")],
[
sg.Button("Infer", key="infer"),
sg.Button("(Re)Start Voice Changer", key="start_vc"),
sg.Button("Stop Voice Changer", key="stop_vc"),
],
]

window = sg.Window(
f"{__name__.split('.')[0]}", layout
) # , use_custom_titlebar=True)
with ProcessPool(max_workers=1) as pool:
future = None
while True:
event, values = window.read(100)
if event == sg.WIN_CLOSED:
break

def update_combo() -> None:
from . import utils

if Path(values["config_path"]).exists():
hp = utils.get_hparams_from_file(values["config_path"])
LOG.info(f"Loaded config from {values['config_path']}")
window["speaker"].update(
values=list(hp.__dict__["spk"].keys()), set_to_index=0
)

if not event == sg.EVENT_TIMEOUT:
LOG.info(f"Event: {event}, values: {values}")
if values["speaker"] == "":
update_combo()

if event == "config_path":
update_combo()
elif event == "infer":
from .inference_main import infer

input_path = Path(values["input_path"])
output_path = (
input_path.parent / f"{input_path.stem}.out{input_path.suffix}"
)
infer(
model_path=Path(values["model_path"]),
config_path=Path(values["config_path"]),
input_path=input_path,
output_path=output_path,
speaker=values["speaker"],
cluster_model_path=Path(values["cluster_model_path"])
if values["cluster_model_path"]
else None,
transpose=values["transpose"],
auto_predict_f0=values["auto_predict_f0"],
cluster_infer_ratio=values["cluster_infer_ratio"],
noise_scale=values["noise_scale"],
db_thresh=values["silence_threshold"],
pad_seconds=values["pad_seconds"],
device="cuda" if values["use_gpu"] else "cpu",
)
if values["auto_play"]:
pool.schedule(play_audio, args=[output_path])
elif event == "play_input":
if Path(values["input_path"]).exists():
pool.schedule(play_audio, args=[Path(values["input_path"])])
elif event == "start_vc":
from .inference_main import realtime

if future:
LOG.info("Canceling previous task")
future.cancel()
future = pool.schedule(
realtime,
kwargs=dict(
model_path=Path(values["model_path"]),
config_path=Path(values["config_path"]),
speaker=values["speaker"],
cluster_model_path=Path(values["cluster_model_path"])
if values["cluster_model_path"]
else None,
transpose=values["transpose"],
auto_predict_f0=values["auto_predict_f0"],
cluster_infer_ratio=values["cluster_infer_ratio"],
noise_scale=values["noise_scale"],
crossfade_seconds=values["crossfade_seconds"],
db_thresh=values["silence_threshold"],
pad_seconds=values["pad_seconds"],
device="cuda" if values["use_gpu"] else "cpu",
block_seconds=values["block_seconds"],
),
)
elif event == "stop_vc":
if future:
future.cancel()
future = None
if future:
future.cancel()
window.close()
Loading

0 comments on commit 34aec2b

Please sign in to comment.