From 21e8e3959427186824b475d6a883a287078a1b22 Mon Sep 17 00:00:00 2001 From: Onako2 <79749977+Onako2@users.noreply.github.com> Date: Sat, 10 Feb 2024 15:36:58 +0100 Subject: [PATCH] chore(notebook): Just realised what happened --- notebooks/so-vits-svc-fork-4.0.ipynb | 656 ++++++++++++++------------- 1 file changed, 329 insertions(+), 327 deletions(-) diff --git a/notebooks/so-vits-svc-fork-4.0.ipynb b/notebooks/so-vits-svc-fork-4.0.ipynb index 3aaa7793..82916213 100644 --- a/notebooks/so-vits-svc-fork-4.0.ipynb +++ b/notebooks/so-vits-svc-fork-4.0.ipynb @@ -1,328 +1,330 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Before training\n", - "\n", - "This program saves the last 3 generations of models to Google Drive. Since 1 generation of models is >1GB, you should have at least 3GB of free space in Google Drive. If you do not have such free space, it is recommended to create another Google Account.\n", - "\n", - "Training requires >10GB VRAM. (T4 should be enough) Inference does not require such a lot of VRAM." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Installation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#@title Check GPU\n", - "!nvidia-smi" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#@title Mount Google Drive\n", - "from google.colab import drive\n", - "drive.mount('/content/drive')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#@title Install dependencies\n", - "#@markdown pip may fail to resolve dependencies and raise ERROR, but it can be ignored.\n", - "!python -m pip install -U pip wheel\n", - "%pip install -U ipython \n", - "\n", - "#@markdown Branch (for development)\n", - "BRANCH = \"none\" #@param {\"type\": \"string\"}\n", - "if BRANCH == \"none\":\n", - " %pip install -U so-vits-svc-fork\n", - "else:\n", - " %pip install -U git+https://github.com/34j/so-vits-svc-fork.git@{BRANCH}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Training" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#@title Make dataset directory\n", - "!mkdir -p \"dataset_raw\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#!rm -r \"dataset_raw\"\n", - "#!rm -r \"dataset/44k\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#@title Copy your dataset\n", - "#@markdown **We assume that your dataset is in your Google Drive's `so-vits-svc-fork/dataset/(speaker_name)` directory.**\n", - "DATASET_NAME = \"kiritan\" #@param {type: \"string\"}\n", - "!cp -R /content/drive/MyDrive/so-vits-svc-fork/dataset/{DATASET_NAME}/ -t \"dataset_raw/\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#@title Download dataset (Tsukuyomi-chan JVS)\n", - "#@markdown You can download this dataset if you don't have your own dataset.\n", - "#@markdown Make sure you agree to the license when using this dataset.\n", - "#@markdown https://tyc.rei-yumesaki.net/material/corpus/#toc6\n", - "# !wget https://tyc.rei-yumesaki.net/files/sozai-tyc-corpus1.zip\n", - "# !unzip sozai-tyc-corpus1.zip\n", - "# !mv \"/content/つくよみちゃんコーパス Vol.1 声優統計コーパス(JVSコーパス準拠)/おまけ:WAV(+12dB増幅&高音域削減)/WAV(+12dB増幅&高音域削減)\" \"dataset_raw/tsukuyomi\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#@title Automatic preprocessing\n", - "!svc pre-resample" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!svc pre-config" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#@title Export configs file\n", - "#@markdown This assumes that you want to save the **config.json** on the default location. There will be also a backup file created in case the action is done accidentally.!cp configs/44k/config.json configs/44k/config.bkp.json\n", - "!cp drive/MyDrive/so-vits-svc-fork/config.json configs/44k\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#@title Import configs file (Optional Step, NOT REQUIRED)\n", - "#@markdown This assumes that you are saving the **config.json** on the default location. There will be also a backup file created in case the action is done accidentally.\n", - "!cp drive/MyDrive/so-vits-svc-fork/config.json drive/MyDrive/so-vits-svc-fork/config.bkp.json\n", - "!cp configs/44k/config.json drive/MyDrive/so-vits-svc-fork\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "F0_METHOD = \"dio\" #@param [\"crepe\", \"crepe-tiny\", \"parselmouth\", \"dio\", \"harvest\"]\n", - "!svc pre-hubert -fm {F0_METHOD}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#@title Train\n", - "%load_ext tensorboard\n", - "%tensorboard --logdir drive/MyDrive/so-vits-svc-fork/logs/44k\n", - "!svc train --model-path drive/MyDrive/so-vits-svc-fork/logs/44k" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Training Cluster model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!svc train-cluster --output-path drive/MyDrive/so-vits-svc-fork/logs/44k/kmeans.pt" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Inference" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#@title Get the author's voice as a source\n", - "import random\n", - "NAME = str(random.randint(1, 49))\n", - "TYPE = \"fsd50k\" #@param [\"\", \"digit\", \"dog\", \"fsd50k\"]\n", - "CUSTOM_FILEPATH = \"\" #@param {type: \"string\"}\n", - "if CUSTOM_FILEPATH != \"\":\n", - " NAME = CUSTOM_FILEPATH\n", - "else:\n", - " # it is extremely difficult to find a voice that can download from the internet directly\n", - " if TYPE == \"dog\":\n", - " !wget -N f\"https://huggingface.co/datasets/437aewuh/dog-dataset/resolve/main/dogs/dogs_{NAME:.0000}.wav\" -O {NAME}.wav\n", - " elif TYPE == \"digit\":\n", - " # george, jackson, lucas, nicolas, ...\n", - " !wget -N f\"https://github.com/Jakobovski/free-spoken-digit-dataset/raw/master/recordings/0_george_{NAME}.wav\" -O {NAME}.wav\n", - " elif TYPE == \"fsd50k\":\n", - " !wget -N f\"https://huggingface.co/datasets/Fhrozen/FSD50k/blob/main/clips/dev/{10000+int(NAME)}.wav\" -O {NAME}.wav\n", - " else:\n", - " !wget -N f\"https://zunko.jp/sozai/utau/voice_{\"kiritan\" if NAME < 25 else \"itako\"}{NAME % 5 + 1}.wav\" -O {NAME}.wav\n", - "from IPython.display import Audio, display\n", - "display(Audio(f\"{NAME}.wav\"))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#@title Use trained model\n", - "#@markdown **Put your .wav file in `so-vits-svc-fork/audio` directory**\n", - "from IPython.display import Audio, display\n", - "!svc infer drive/MyDrive/so-vits-svc-fork/audio/{NAME}.wav -m drive/MyDrive/so-vits-svc-fork/logs/44k/ -c drive/MyDrive/so-vits-svc-fork/logs/44k/config.json\n", - "display(Audio(f\"drive/MyDrive/so-vits-svc-fork/audio/{NAME}.out.wav\", autoplay=True))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "##@title Use trained model (with cluster)\n", - "!svc infer {NAME}.wav -s speaker -r 0.1 -m drive/MyDrive/so-vits-svc-fork/logs/44k/ -c drive/MyDrive/so-vits-svc-fork/logs/44k/config.json -k drive/MyDrive/so-vits-svc-fork/logs/44k/kmeans.pt\n", - "display(Audio(f\"{NAME}.out.wav\", autoplay=True))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Pretrained models" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#@title https://huggingface.co/TachibanaKimika/so-vits-svc-4.0-models/tree/main\n", - "!wget -N \"https://huggingface.co/TachibanaKimika/so-vits-svc-4.0-models/resolve/main/riri/G_riri_220.pth\"\n", - "!wget -N \"https://huggingface.co/TachibanaKimika/so-vits-svc-4.0-models/resolve/main/riri/config.json\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!svc infer {NAME}.wav -c config.json -m G_riri_220.pth\n", - "display(Audio(f\"{NAME}.out.wav\", autoplay=True))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#@title https://huggingface.co/therealvul/so-vits-svc-4.0/tree/main\n", - "!wget -N \"https://huggingface.co/therealvul/so-vits-svc-4.0/resolve/main/Pinkie%20(speaking%20sep)/G_166400.pth\"\n", - "!wget -N \"https://huggingface.co/therealvul/so-vits-svc-4.0/resolve/main/Pinkie%20(speaking%20sep)/config.json\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!svc infer {NAME}.wav --speaker \"Pinkie {neutral}\" -c config.json -m G_166400.pth\n", - "display(Audio(f\"{NAME}.out.wav\", autoplay=True))" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "provenance": [] - }, - "gpuClass": "standard", - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Before training\n", + "\n", + "This program saves the last 3 generations of models to Google Drive. Since 1 generation of models is >1GB, you should have at least 3GB of free space in Google Drive. If you do not have such free space, it is recommended to create another Google Account.\n", + "\n", + "Training requires >10GB VRAM. (T4 should be enough) Inference does not require such a lot of VRAM." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Installation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#@title Check GPU\n", + "!nvidia-smi" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#@title Mount Google Drive\n", + "from google.colab import drive\n", + "drive.mount('/content/drive')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#@title Install dependencies\n", + "#@markdown pip may fail to resolve dependencies and raise ERROR, but it can be ignored.\n", + "!python -m pip install -U pip wheel\n", + "%pip install -U ipython \n", + "\n", + "#@markdown Branch (for development)\n", + "BRANCH = \"none\" #@param {\"type\": \"string\"}\n", + "if BRANCH == \"none\":\n", + " %pip install -U so-vits-svc-fork\n", + "else:\n", + " %pip install -U git+https://github.com/34j/so-vits-svc-fork.git@{BRANCH}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#@title Make dataset directory\n", + "!mkdir -p \"dataset_raw\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#!rm -r \"dataset_raw\"\n", + "#!rm -r \"dataset/44k\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#@title Copy your dataset\n", + "#@markdown **We assume that your dataset is in your Google Drive's `so-vits-svc-fork/dataset/(speaker_name)` directory.**\n", + "DATASET_NAME = \"kiritan\" #@param {type: \"string\"}\n", + "!cp -R /content/drive/MyDrive/so-vits-svc-fork/dataset/{DATASET_NAME}/ -t \"dataset_raw/\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#@title Download dataset (Tsukuyomi-chan JVS)\n", + "#@markdown You can download this dataset if you don't have your own dataset.\n", + "#@markdown Make sure you agree to the license when using this dataset.\n", + "#@markdown https://tyc.rei-yumesaki.net/material/corpus/#toc6\n", + "# !wget https://tyc.rei-yumesaki.net/files/sozai-tyc-corpus1.zip\n", + "# !unzip sozai-tyc-corpus1.zip\n", + "# !mv \"/content/つくよみちゃんコーパス Vol.1 声優統計コーパス(JVSコーパス準拠)/おまけ:WAV(+12dB増幅&高音域削減)/WAV(+12dB増幅&高音域削減)\" \"dataset_raw/tsukuyomi\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#@title Automatic preprocessing\n", + "!svc pre-resample" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!svc pre-config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#@title Export configs file\n", + "#@markdown This assumes that you want to save the **config.json** on the default location. There will be also a backup file created in case the action is done accidentally.\n", + "!cp drive/MyDrive/so-vits-svc-fork/config.json drive/MyDrive/so-vits-svc-fork/config.bkp.json\n", + "!cp configs/44k/config.json drive/MyDrive/so-vits-svc-fork\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#@title Import configs file (Optional Step, NOT REQUIRED)\n", + "#@markdown This assumes that you are saving the **config.json** on the default location. There will be also a backup file created in case the action is done accidentally.\n", + "!cp configs/44k/config.json configs/44k/config.bkp.json\n", + "!cp drive/MyDrive/so-vits-svc-fork/config.json configs/44k\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "F0_METHOD = \"dio\" #@param [\"crepe\", \"crepe-tiny\", \"parselmouth\", \"dio\", \"harvest\"]\n", + "!svc pre-hubert -fm {F0_METHOD}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#@title Train\n", + "%load_ext tensorboard\n", + "%tensorboard --logdir drive/MyDrive/so-vits-svc-fork/logs/44k\n", + "!svc train --model-path drive/MyDrive/so-vits-svc-fork/logs/44k" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training Cluster model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!svc train-cluster --output-path drive/MyDrive/so-vits-svc-fork/logs/44k/kmeans.pt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Inference" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#@title Get the author's voice as a source\n", + "import random\n", + "NAME = str(random.randint(1, 49))\n", + "TYPE = \"fsd50k\" #@param [\"\", \"digit\", \"dog\", \"fsd50k\"]\n", + "CUSTOM_FILEPATH = \"\" #@param {type: \"string\"}\n", + "if CUSTOM_FILEPATH != \"\":\n", + " NAME = CUSTOM_FILEPATH\n", + "else:\n", + " # it is extremely difficult to find a voice that can download from the internet directly\n", + " if TYPE == \"dog\":\n", + " !wget -N f\"https://huggingface.co/datasets/437aewuh/dog-dataset/resolve/main/dogs/dogs_{NAME:.0000}.wav\" -O {NAME}.wav\n", + " elif TYPE == \"digit\":\n", + " # george, jackson, lucas, nicolas, ...\n", + " !wget -N f\"https://github.com/Jakobovski/free-spoken-digit-dataset/raw/master/recordings/0_george_{NAME}.wav\" -O {NAME}.wav\n", + " elif TYPE == \"fsd50k\":\n", + " !wget -N f\"https://huggingface.co/datasets/Fhrozen/FSD50k/blob/main/clips/dev/{10000+int(NAME)}.wav\" -O {NAME}.wav\n", + " else:\n", + " !wget -N f\"https://zunko.jp/sozai/utau/voice_{\"kiritan\" if NAME < 25 else \"itako\"}{NAME % 5 + 1}.wav\" -O {NAME}.wav\n", + "from IPython.display import Audio, display\n", + "display(Audio(f\"{NAME}.wav\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#@title Use trained model\n", + "#@markdown **Put your .wav file in `so-vits-svc-fork/audio` directory**\n", + "from IPython.display import Audio, display\n", + "!svc infer drive/MyDrive/so-vits-svc-fork/audio/{NAME}.wav -m drive/MyDrive/so-vits-svc-fork/logs/44k/ -c drive/MyDrive/so-vits-svc-fork/logs/44k/config.json\n", + "display(Audio(f\"drive/MyDrive/so-vits-svc-fork/audio/{NAME}.out.wav\", autoplay=True))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "##@title Use trained model (with cluster)\n", + "!svc infer {NAME}.wav -s speaker -r 0.1 -m drive/MyDrive/so-vits-svc-fork/logs/44k/ -c drive/MyDrive/so-vits-svc-fork/logs/44k/config.json -k drive/MyDrive/so-vits-svc-fork/logs/44k/kmeans.pt\n", + "display(Audio(f\"{NAME}.out.wav\", autoplay=True))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Pretrained models" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#@title https://huggingface.co/TachibanaKimika/so-vits-svc-4.0-models/tree/main\n", + "!wget -N \"https://huggingface.co/TachibanaKimika/so-vits-svc-4.0-models/resolve/main/riri/G_riri_220.pth\"\n", + "!wget -N \"https://huggingface.co/TachibanaKimika/so-vits-svc-4.0-models/resolve/main/riri/config.json\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!svc infer {NAME}.wav -c config.json -m G_riri_220.pth\n", + "display(Audio(f\"{NAME}.out.wav\", autoplay=True))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#@title https://huggingface.co/therealvul/so-vits-svc-4.0/tree/main\n", + "!wget -N \"https://huggingface.co/therealvul/so-vits-svc-4.0/resolve/main/Pinkie%20(speaking%20sep)/G_166400.pth\"\n", + "!wget -N \"https://huggingface.co/therealvul/so-vits-svc-4.0/resolve/main/Pinkie%20(speaking%20sep)/config.json\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!svc infer {NAME}.wav --speaker \"Pinkie {neutral}\" -c config.json -m G_166400.pth\n", + "display(Audio(f\"{NAME}.out.wav\", autoplay=True))" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "provenance": [] + }, + "gpuClass": "standard", + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 + } +