diff --git a/README.md b/README.md
index 711a6ac3..769cfc62 100644
--- a/README.md
+++ b/README.md
@@ -44,6 +44,7 @@ pip install so-vits-svc-fork
 ## Features not available in the original repo
 
 - **Realtime voice conversion**
+- GUI available
 - Unified command-line interface (no need to run Python scripts)
 - Ready to use just by installing with `pip`.
 - Automatically download pretrained base model and HuBERT model
@@ -51,7 +52,25 @@ pip install so-vits-svc-fork
 
 ## Usage
 
-### Realtime Voice conversion
+### Inference
+
+#### GUI
+
+![GUI](https://raw.githubusercontent.com/34j/so-vits-svc-fork/main/docs/_static/gui.png)
+
+```shell
+svcg
+```
+
+#### CLI
+
+- Realtime (from microphone)
+
+```shell
+svc --model-path <model-path> source.wav
+```
+
+- File
 
 ```shell
 svc vc --model-path <model-path>
@@ -70,11 +89,7 @@ svc pre-hubert
 svc train
 ```
 
-### Inference
-
-```shell
-svc --model-path <model-path> source.wav
-```
+### Further help
 
 For more details, run `svc -h` or `svc <subcommand> -h`.
 
diff --git a/docs/_static/gui.png b/docs/_static/gui.png
new file mode 100644
index 00000000..dbc93c0b
Binary files /dev/null and b/docs/_static/gui.png differ
diff --git a/poetry.lock b/poetry.lock
index c2ace889..df7f724a 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2578,6 +2578,18 @@ pytz = ">=2020.1"
 [package.extras]
 test = ["hypothesis (>=5.5.3)", "pytest (>=6.0)", "pytest-xdist (>=1.31)"]
 
+[[package]]
+name = "pebble"
+version = "5.0.3"
+description = "Threading and multiprocessing eye-candy."
+category = "main"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "Pebble-5.0.3-py3-none-any.whl", hash = "sha256:8274aa0959f387b368ede47666129cbe5d123f276a1bd9cafe77e020194b2141"},
+    {file = "Pebble-5.0.3.tar.gz", hash = "sha256:bdcfd9ea7e0aedb895b204177c19e6d6543d9962f4e3402ebab2175004863da8"},
+]
+
 [[package]]
 name = "pillow"
 version = "9.4.0"
@@ -3118,6 +3130,18 @@ files = [
     {file = "pyrsistent-0.19.3.tar.gz", hash = "sha256:1a2994773706bbb4995c31a97bc94f1418314923bd1048c6d964837040376440"},
 ]
 
+[[package]]
+name = "pysimplegui"
+version = "4.60.4"
+description = "Python GUIs for Humans. Launched in 2018. It's 2022 & PySimpleGUI is an ACTIVE & supported project. Super-simple to create custom GUI's. 325+ Demo programs & Cookbook for rapid start. Extensive documentation. Main docs at www.PySimpleGUI.org. Fun & your success are the focus. Examples using Machine Learning (GUI, OpenCV Integration), Rainmeter Style Desktop Widgets, Matplotlib + Pyplot, PIL support, add GUI to command line scripts, PDF & Image Viewers. Great for beginners & advanced GUI programmers."
+category = "main"
+optional = false
+python-versions = "*"
+files = [
+    {file = "PySimpleGUI-4.60.4-py3-none-any.whl", hash = "sha256:e133fbd21779f0f125cebbc2a4e1f5a931a383738661013ff33ad525d5611eda"},
+    {file = "PySimpleGUI-4.60.4.tar.gz", hash = "sha256:f88c82c301a51aea35be605dc060bcceb0dcb6682e16280544884701ab4b23ba"},
+]
+
 [[package]]
 name = "pysimplevalidate"
 version = "0.2.12"
@@ -4670,4 +4694,4 @@ testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8,<3.11"
-content-hash = "bd0a5148f6634dc9b2df2d30a8752d0de8dc72d509827ea6b4245e12bfb34060"
+content-hash = "c484b8f4456aa9c2c6964b1173b94cfed86b84643fe10adf55fd49714bfc8a16"
diff --git a/pyproject.toml b/pyproject.toml
index 560ab76a..ecf11490 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,6 +21,10 @@ packages = [
 [tool.poetry.scripts]
 so-vits-svc-fork = "so_vits_svc_fork.__main__:cli"
 svc = "so_vits_svc_fork.__main__:cli"
+svcf = "so_vits_svc_fork.__main__:cli"
+svcg = "so_vits_svc_fork.gui:main"
+svc-gui = "so_vits_svc_fork.gui:main"
+svcf-gui = "so_vits_svc_fork.gui:main"
 
 [tool.poetry.urls]
 "Bug Tracker" = "https://github.com/34j/so-vits-svc-fork/issues"
@@ -55,6 +59,8 @@ tqdm-joblib = "*"
 tensorboardx = "*"
 pyinputplus = "*"
 cm-time = "^0.1.2"
+pysimplegui = ">=4.6"
+pebble = "^5.0.3"
 
 [tool.poetry.group.dev.dependencies]
 pre-commit = ">=3"
diff --git a/src/so_vits_svc_fork/__main__.py b/src/so_vits_svc_fork/__main__.py
index 4f4b57ed..a07c7532 100644
--- a/src/so_vits_svc_fork/__main__.py
+++ b/src/so_vits_svc_fork/__main__.py
@@ -17,18 +17,22 @@
 import torch
 from rich.logging import RichHandler
 
-IN_COLAB = os.getenv("COLAB_RELEASE_TAG")
-
-basicConfig(
-    level=INFO,
-    format="%(asctime)s %(message)s",
-    datefmt="[%X]",
-    handlers=[
-        RichHandler() if not IN_COLAB else StreamHandler(),
-        FileHandler(f"{__name__.split('.')[0]}.log"),
-    ],
-)
-captureWarnings(True)
+
+def init_logger() -> None:
+    IN_COLAB = os.getenv("COLAB_RELEASE_TAG")
+
+    basicConfig(
+        level=INFO,
+        format="%(asctime)s %(message)s",
+        datefmt="[%X]",
+        handlers=[
+            RichHandler() if not IN_COLAB else StreamHandler(),
+            FileHandler(f"{__name__.split('.')[0]}.log"),
+        ],
+    )
+    captureWarnings(True)
+
+
 LOG = getLogger(__name__)
 
 
@@ -48,6 +52,7 @@ def cli():
     To train a model, run pre-resample, pre-config, pre-hubert, train.
     To infer a model, run infer.
     """
+    init_logger()
 
 
 @click.help_option("--help", "-h")
diff --git a/src/so_vits_svc_fork/gui.py b/src/so_vits_svc_fork/gui.py
new file mode 100644
index 00000000..d09ca2b8
--- /dev/null
+++ b/src/so_vits_svc_fork/gui.py
@@ -0,0 +1,234 @@
+from logging import getLogger
+from pathlib import Path
+
+import PySimpleGUI as sg
+import sounddevice as sd
+import soundfile as sf
+from pebble import ProcessPool
+
+from .__main__ import init_logger
+
+LOG = getLogger(__name__)
+
+init_logger()
+
+
+def play_audio(path: Path | str):
+    if isinstance(path, Path):
+        path = path.as_posix()
+    data, sr = sf.read(path)
+    sd.play(data, sr)
+
+
+def main():
+    sg.theme("Dark")
+    model_candidates = list(sorted(Path("./logs/44k/").glob("G_*.pth")))
+    layout = [
+        [
+            sg.Text("Model path: "),
+            sg.InputText(
+                key="model_path",
+                default_text=model_candidates[-1].as_posix()
+                if model_candidates
+                else "",
+            ),
+            sg.FileBrowse(
+                initial_folder="./logs/44k/" if Path("./logs/44k/").exists() else "."
+            ),
+        ],
+        [
+            sg.Text("Config path: "),
+            sg.InputText(
+                key="config_path",
+                default_text="./configs/44k/config.json",
+                enable_events=True,
+            ),
+            sg.FileBrowse(
+                initial_folder="./configs/44k/"
+                if Path("./configs/44k/").exists()
+                else "."
+            ),
+        ],
+        [sg.Text("Speaker"), sg.Combo(values=[], key="speaker", size=(20, 1))],
+        [
+            sg.Text("Input audio path:"),
+            sg.InputText(key="input_path"),
+            sg.FileBrowse(initial_folder="."),
+            sg.Button("Play", key="play_input"),
+        ],
+        [
+            sg.Text("Silence threshold: "),
+            sg.Slider(
+                range=(-60.0, 0),
+                orientation="h",
+                key="silence_threshold",
+                default_value=-20,
+                resolution=0.1,
+            ),
+        ],
+        [
+            sg.Checkbox(
+                key="auto_predict_f0",
+                default=True,
+                text="Auto predict F0 (Pitch may become unstable when turned on in real-time inference.)",
+            )
+        ],
+        [
+            sg.Text("Pitch: "),
+            sg.Slider(
+                range=(-20, 20), orientation="h", key="transpose", default_value=0
+            ),
+        ],
+        [
+            sg.Text("Cluster infer ratio: "),
+            sg.Slider(
+                range=(0, 1.0),
+                orientation="h",
+                key="cluster_infer_ratio",
+                default_value=0,
+                resolution=0.01,
+            ),
+        ],
+        [
+            sg.Text("Cluster model path: "),
+            sg.InputText(key="cluster_model_path"),
+            sg.FileBrowse(),
+        ],
+        [
+            sg.Text("Noise scale: "),
+            sg.Slider(
+                range=(0.0, 1.0),
+                orientation="h",
+                key="noise_scale",
+                default_value=0.4,
+                resolution=0.01,
+            ),
+        ],
+        [
+            sg.Text("Pad seconds"),
+            sg.Slider(
+                range=(0.0, 1.0),
+                orientation="h",
+                key="pad_seconds",
+                default_value=0.1,
+                resolution=0.01,
+            ),
+        ],
+        [
+            sg.Text("Crossfade seconds"),
+            sg.Slider(
+                range=(0, 0.6),
+                orientation="h",
+                key="crossfade_seconds",
+                default_value=0.1,
+                resolution=0.001,
+            ),
+        ],
+        [
+            sg.Text("Block seconds"),
+            sg.Slider(
+                range=(0, 3.0),
+                orientation="h",
+                key="block_seconds",
+                default_value=1,
+                resolution=0.01,
+            ),
+        ],
+        [sg.Checkbox(key="use_gpu", default=True, text="Use GPU")],
+        [sg.Checkbox(key="auto_play", default=True, text="Auto play")],
+        [
+            sg.Button("Infer", key="infer"),
+            sg.Button("(Re)Start Voice Changer", key="start_vc"),
+            sg.Button("Stop Voice Changer", key="stop_vc"),
+        ],
+    ]
+
+    window = sg.Window(
+        f"{__name__.split('.')[0]}", layout
+    )  # , use_custom_titlebar=True)
+    with ProcessPool(max_workers=1) as pool:
+        future = None
+        while True:
+            event, values = window.read(100)
+            if event == sg.WIN_CLOSED:
+                break
+
+            def update_combo() -> None:
+                from . import utils
+
+                if Path(values["config_path"]).exists():
+                    hp = utils.get_hparams_from_file(values["config_path"])
+                    LOG.info(f"Loaded config from {values['config_path']}")
+                    window["speaker"].update(
+                        values=list(hp.__dict__["spk"].keys()), set_to_index=0
+                    )
+
+            if not event == sg.EVENT_TIMEOUT:
+                LOG.info(f"Event: {event}, values: {values}")
+            if values["speaker"] == "":
+                update_combo()
+
+            if event == "config_path":
+                update_combo()
+            elif event == "infer":
+                from .inference_main import infer
+
+                input_path = Path(values["input_path"])
+                output_path = (
+                    input_path.parent / f"{input_path.stem}.out{input_path.suffix}"
+                )
+                infer(
+                    model_path=Path(values["model_path"]),
+                    config_path=Path(values["config_path"]),
+                    input_path=input_path,
+                    output_path=output_path,
+                    speaker=values["speaker"],
+                    cluster_model_path=Path(values["cluster_model_path"])
+                    if values["cluster_model_path"]
+                    else None,
+                    transpose=values["transpose"],
+                    auto_predict_f0=values["auto_predict_f0"],
+                    cluster_infer_ratio=values["cluster_infer_ratio"],
+                    noise_scale=values["noise_scale"],
+                    db_thresh=values["silence_threshold"],
+                    pad_seconds=values["pad_seconds"],
+                    device="cuda" if values["use_gpu"] else "cpu",
+                )
+                if values["auto_play"]:
+                    pool.schedule(play_audio, args=[output_path])
+            elif event == "play_input":
+                if Path(values["input_path"]).exists():
+                    pool.schedule(play_audio, args=[Path(values["input_path"])])
+            elif event == "start_vc":
+                from .inference_main import realtime
+
+                if future:
+                    LOG.info("Canceling previous task")
+                    future.cancel()
+                future = pool.schedule(
+                    realtime,
+                    kwargs=dict(
+                        model_path=Path(values["model_path"]),
+                        config_path=Path(values["config_path"]),
+                        speaker=values["speaker"],
+                        cluster_model_path=Path(values["cluster_model_path"])
+                        if values["cluster_model_path"]
+                        else None,
+                        transpose=values["transpose"],
+                        auto_predict_f0=values["auto_predict_f0"],
+                        cluster_infer_ratio=values["cluster_infer_ratio"],
+                        noise_scale=values["noise_scale"],
+                        crossfade_seconds=values["crossfade_seconds"],
+                        db_thresh=values["silence_threshold"],
+                        pad_seconds=values["pad_seconds"],
+                        device="cuda" if values["use_gpu"] else "cpu",
+                        block_seconds=values["block_seconds"],
+                    ),
+                )
+            elif event == "stop_vc":
+                if future:
+                    future.cancel()
+                    future = None
+        if future:
+            future.cancel()
+    window.close()
diff --git a/src/so_vits_svc_fork/inference/infer_tool.py b/src/so_vits_svc_fork/inference/infer_tool.py
index 0458af2e..1217f00a 100644
--- a/src/so_vits_svc_fork/inference/infer_tool.py
+++ b/src/so_vits_svc_fork/inference/infer_tool.py
@@ -119,13 +119,19 @@ def infer(
     ):
         audio = audio.astype(np.float32)
         # get speaker id
-        speaker_id = self.spk2id.__dict__.get(speaker)
-        if not speaker_id and isinstance(speaker, int):
+        if isinstance(speaker, int):
             if len(self.spk2id.__dict__) >= speaker:
                 speaker_id = speaker
+            else:
+                raise ValueError(
+                    f"Speaker id {speaker} >= number of speakers {len(self.spk2id.__dict__)}"
+                )
         else:
-            LOG.warning(f"Speaker {speaker} is not found. Use speaker 0 instead.")
-            speaker_id = 0
+            if speaker in self.spk2id.__dict__:
+                speaker_id = self.spk2id.__dict__[speaker]
+            else:
+                LOG.warning(f"Speaker {speaker} is not found. Use speaker 0 instead.")
+                speaker_id = 0
         sid = torch.LongTensor([int(speaker_id)]).to(self.dev).unsqueeze(0)
 
         # get unit f0
@@ -167,7 +173,7 @@ def infer_silence(
         # slice config
         db_thresh: int = -40,
         pad_seconds: float = 0.5,
-        fade_seconds: float = 0.04,
+        # fade_seconds: float = 0.0,
     ) -> np.ndarray[Any, np.dtype[np.float32]]:
         chunks = slicer.cut(audio, self.target_sample, db_thresh=db_thresh)
         LOG.info(f"Cut audio into chunks {chunks}")
@@ -197,9 +203,9 @@ def infer_silence(
                 _audio = _audio[pad_len:-pad_len]
 
                 # add fade
-                fade_len = int(self.target_sample * fade_seconds)
-                _audio[:fade_len] = _audio[:fade_len] * np.linspace(0, 1, fade_len)
-                _audio[-fade_len:] = _audio[-fade_len:] * np.linspace(1, 0, fade_len)
+                # fade_len = int(self.target_sample * fade_seconds)
+                # _audio[:fade_len] = _audio[:fade_len] * np.linspace(0, 1, fade_len)
+                # _audio[-fade_len:] = _audio[-fade_len:] * np.linspace(1, 0, fade_len)
             result_audio = np.concatenate([result_audio, pad_array(_audio, length)])
         result_audio = result_audio[: audio.shape[0]]
         return result_audio
@@ -238,6 +244,15 @@ def process(
         db_thresh: int = -40,
         pad_seconds: float = 0.5,
     ):
+        """
+        chunks        : ■■■■■■□□□□□□
+        add last input:□■■■■■■
+                             ■□□□□□□
+        infer         :□■■■■■■
+                             ■□□□□□□
+        crossfade     :▲■■■■■
+                             ▲□□□□□
+        """
         if input_audio.ndim != 1:
             raise ValueError("Input audio must be 1-dimensional.")
         if input_audio.shape[0] < self.crossfade_len:
@@ -286,15 +301,14 @@ def process(
                     noise_scale=noise_scale,
                 )
                 infered_audio_c = infered_audio_c.cpu().numpy()
-        infered_audio_c = infered_audio_c
         LOG.info(f"Concentrated Inferred shape: {infered_audio_c.shape}")
         assert infered_audio_c.shape[0] == input_audio_c.shape[0]
 
         # crossfade
         result = maad.util.crossfade(
             self.last_infered, infered_audio_c, 1, self.crossfade_len
-        )[: input_audio.shape[0]]
+        )[-(input_audio.shape[0] + self.crossfade_len) : -self.crossfade_len]
         LOG.info(f"Result shape: {result.shape}")
         assert result.shape[0] == input_audio.shape[0]
-        self.last_infered = infered_audio_c
+        self.last_infered = infered_audio_c[-self.crossfade_len - 1 :].copy()
         return result