Merge remote-tracking branch 'upstream/main'

Mangio621 · Mangio621 · commit 43b2f41daffe · 2023-06-19T01:43:09.000+10:00
diff --git a/Changelog_CN.md b/Changelog_CN.md
@@ -1,3 +1,24 @@
+### 20230618更新
+- v2增加32k和48k两个新预训练模型
+- 修复非f0模型推理报错
+- 对于超过一小时的训练集的索引建立环节，自动kmeans缩小特征处理以加速索引训练、加入和查询
+- 附送一个人声转吉他玩具仓库
+- 数据处理剔除异常值切片
+- onnx导出选项卡
+
+失败的实验：
+- ~~特征检索增加时序维度：寄，没啥效果~~
+- ~~特征检索增加PCAR降维可选项：寄，数据大用kmeans缩小数据量，数据小降维操作耗时比省下的匹配耗时还多~~
+- ~~支持onnx推理（附带仅推理的小压缩包）：寄，生成nsf还是需要pytorch~~
+- ~~训练时在音高、gender、eq、噪声等方面对输入进行随机增强：寄，没啥效果~~
+
+todolist：
+- 接入小型声码器调研
+- 训练集音高识别支持crepe
+- crepe的精度支持和RVC-config同步
+- 对接F0编辑器
+  
+  
 ### 20230528更新
 - 增加v2的jupyter notebook，韩文changelog，增加一些环境依赖
 - 增加呼吸、清辅音、齿音保护模式
@@ -7,15 +28,6 @@
 - 人声伴奏分离、推理批量导出增加音频导出格式选项
 - 废弃32k模型的训练
 
-todolist：
-- ~~特征检索增加时序维度：寄，没啥效果~~
-- 特征检索增加pre-kmeans可选项
-- ~~特征检索增加PCAR降维可选项：寄，数据大用kmeans缩小数据量，数据小降维操作耗时比省下的匹配耗时还多~~
-- ~~支持onnx推理（附带仅推理的小压缩包）：寄，生成nsf还是需要pytorch~~
-- ~~训练时在音高、gender、eq、噪声等方面对输入进行随机增强：寄，没啥效果~~
-- 补全v2版本的48k预训练模型
-
-
 ### 20230513更新
 - 清除一键包内部老版本runtime内残留的infer_pack和uvr5_pack
 - 修复训练集预处理伪多进程的bug
diff --git a/Changelog_EN.md b/Changelog_EN.md
@@ -1,19 +1,30 @@
+### 2023-06-18
+- New pretrained v2 models: 32k and 48k
+- Fix non-f0 model inference errors
+- For training-set exceeding 1 hour, do automatic minibatch-kmeans to reduce feature shape, so that index training, adding, and searching will be much faster.
+- Provide a toy vocal2guitar huggingface space
+- Auto delete outlier short cut training-set audios
+- Onnx export tab
+
+Failed experiments:
+- ~~Feature retrieval: add temporal feature retrieval: not effective~~
+- ~~Feature retrieval: add PCAR dimensionality reduction: searching is even slower~~
+- ~~Random data augmentation when training: not effective~~
+
+todolist：
+- Vocos-RVC (tiny vocoder)
+- Crepe support for training
+- Half precision crepe inference
+- F0 editor support
+
 ### 2023-05-28
 - Add v2 jupyter notebook, korean changelog, fix some environment requirments
 - Add voiceless consonant and breath protection mode
 - Support crepe-full pitch detect
 - UVR5 vocal separation: support dereverb models and de-echo models
 - Add experiment name and version on the name of index
 - Support users to manually select export format of output audios when batch voice conversion processing and UVR5 vocal separation
-- 32k model training is no more supported
-
-todolist：
-- Feature retrieval: add temporal feature retrieval
-- Feature retrieval: add pre-kmeans option
-- Feature retrieval: add PCAR dimensionality reduction
-- Add onnx inference support
-- Random data augmentation when training: pitch, gender, eq, noise
-- Add v2 version pretrained-models
+- v1 32k model training is no more supported
 
 ### 2023-05-13
 - Clear the redundant codes in the old version of runtime in the one-click-package: infer_pack and uvr5_pack
diff --git a/infer-web.py b/infer-web.py
@@ -666,12 +666,12 @@ def change_sr2(sr2, if_f0_3, version19):
     if_pretrained_discriminator_exist = os.access(
         "pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK
     )
-    if if_pretrained_generator_exist is not False:
+    if not if_pretrained_generator_exist:
         print(
             "pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2),
             "not exist, will not use pretrained model",
         )
-    if if_pretrained_discriminator_exist is not False:
+    if not if_pretrained_discriminator_exist:
         print(
             "pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2),
             "not exist, will not use pretrained model",
@@ -683,7 +683,6 @@ def change_sr2(sr2, if_f0_3, version19):
         "pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2)
         if if_pretrained_discriminator_exist
         else "",
-        {"visible": True, "__type__": "update"},
     )
 
 
@@ -692,9 +691,9 @@ def change_version19(sr2, if_f0_3, version19):
     if sr2 == "32k" and version19 == "v1":
         sr2 = "40k"
     to_return_sr2 = (
-        {"choices": ["40k", "48k"], "__type__": "update"}
+        {"choices": ["40k", "48k"], "__type__": "update", "value": sr2}
         if version19 == "v1"
-        else {"choices": ["32k", "40k", "48k"], "__type__": "update"}
+        else {"choices": ["40k", "48k", "32k"], "__type__": "update", "value": sr2}
     )
     f0_str = "f0" if if_f0_3 else ""
     if_pretrained_generator_exist = os.access(
@@ -2106,7 +2105,7 @@ def get_presets():
                     sr2.change(
                         change_sr2,
                         [sr2, if_f0_3, version19],
-                        [pretrained_G14, pretrained_D15, version19],
+                        [pretrained_G14, pretrained_D15],
                     )
                     version19.change(
                         change_version19,
@@ -2289,7 +2288,7 @@ def get_presets():
                     version_1 = gr.Radio(
                         label=i18n("模型版本型号"),
                         choices=["v1", "v2"],
-                        value="v1",
+                        value="v2",
                         interactive=True,
                     )
                     info___ = gr.Textbox(
diff --git a/train/utils.py b/train/utils.py
@@ -360,7 +360,7 @@ def get_hparams(init=True):
     if not os.path.exists(experiment_dir):
         os.makedirs(experiment_dir)
 
-    if(args.version=="v1"or args.sample_rate=="40k"):
+    if args.version == "v1" or args.sample_rate == "40k":
         config_path = "configs/%s.json" % args.sample_rate
     else:
         config_path = "configs/%s_v2.json" % args.sample_rate