diff --git a/Dockerfile.mac b/Dockerfile.mac
new file mode 100644
index 000000000..596ae5679
--- /dev/null
+++ b/Dockerfile.mac
@@ -0,0 +1,37 @@
+FROM python:3.10-slim
+
+EXPOSE 7865
+
+WORKDIR /app
+
+RUN apt-get update && \
+    apt-get install -y -qq ffmpeg aria2 git build-essential && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+
+COPY requirements.txt .
+
+RUN pip install --upgrade "pip<24.1" && \
+    pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu && \
+    pip install --no-cache-dir -r requirements.txt && \
+    pip install fairseq==0.12.2 && \
+    pip install gradio==3.34.0 gradio-client==0.2.7
+
+COPY . .
+
+
+RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D40k.pth -d assets/pretrained_v2/ -o D40k.pth && \
+    aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G40k.pth -d assets/pretrained_v2/ -o G40k.pth && \
+    aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D40k.pth -d assets/pretrained_v2/ -o f0D40k.pth && \
+    aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G40k.pth -d assets/pretrained_v2/ -o f0G40k.pth
+
+RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d assets/hubert -o hubert_base.pt && \
+    aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/rmvpe.pt -d assets/rmvpe -o rmvpe.pt
+
+RUN aria2c --console-log-level=error -c -x 16 -s 16 -k 1M "https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2-%E4%BA%BA%E5%A3%B0vocals%2B%E9%9D%9E%E4%BA%BA%E5%A3%B0instrumentals.pth" -d assets/uvr5_weights/ -o "HP2-人声vocals+非人声instrumentals.pth" && \
+    aria2c --console-log-level=error -c -x 16 -s 16 -k 1M "https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5-%E4%B8%BB%E6%97%8B%E5%BE%8B%E4%BA%BA%E5%A3%B0vocals%2B%E5%85%B6%E4%BB%96instrumentals.pth" -d assets/uvr5_weights/ -o "HP5-主旋律人声vocals+其他instrumentals.pth"
+
+VOLUME [ "/app/weights", "/app/logs", "/app/assets/weights" ]
+
+CMD ["python", "infer-web.py"]
diff --git a/configs/config.py b/configs/config.py
index a330fb543..47edad7f5 100644
--- a/configs/config.py
+++ b/configs/config.py
@@ -167,7 +167,8 @@ def device_config(self) -> tuple:
                 self.preprocess_per = 3.0
         elif self.has_mps():
             logger.info("No supported Nvidia GPU found")
-            self.device = self.instead = "mps"
+            logger.info("MPS available but using CPU for stability")
+            self.device = self.instead = "cpu"
             self.is_half = False
             self.use_fp32_config()
         else:
diff --git a/datasets/README.md b/datasets/README.md
new file mode 100644
index 000000000..554aca12f
--- /dev/null
+++ b/datasets/README.md
@@ -0,0 +1,36 @@
+# Voice Datasets
+
+This directory contains the audio datasets for training custom RVC models.
+
+## Structure
+
+Each subdirectory corresponds to a specific voice type:
+
+- `male_low/`: Bass/Baritone male voices
+- `male_mid/`: Tenor/Mid-range male voices
+- `female_low/`: Alto/Contralto female voices
+- `female_high/`: Soprano/High-range female voices
+- `anime_airy/`: Breath/Airy anime-style voices
+- `accent_non_native/`: Voices with distinct non-native accents
+- `singing_male/`: Male singing vocals
+- `singing_female/`: Female singing vocals
+- `child/`: Child voices
+- `elderly/`: Elderly voices
+
+## How to Add Data
+
+1.  **Collect Audio**: Gather 10-15 minutes of clean, single-speaker audio for the desired category.
+2.  **Place Files**: Put the raw audio files (mp3, wav, etc.) into a temporary folder or directly here.
+3.  **Process**: Use the provided tool to normalize and split the audio.
+
+```bash
+# Example: Processing a raw file into the male_low dataset
+python tools/audio_preprocessor.py -i raw_audio/my_voice.mp3 -o datasets/male_low
+```
+
+## Requirements
+
+- **Format**: WAV (will be converted automatically)
+- **Sample Rate**: 40kHz or 48kHz (will be converted automatically)
+- **Channels**: Mono (will be converted automatically)
+- **Quality**: No background noise, music, or reverb. Use UVR5 to clean if necessary.
diff --git a/docker-compose.mac.yml b/docker-compose.mac.yml
new file mode 100644
index 000000000..94472795d
--- /dev/null
+++ b/docker-compose.mac.yml
@@ -0,0 +1,18 @@
+version: '3.8'
+
+services:
+  rvc-webui:
+    build:
+      context: .
+      dockerfile: Dockerfile.mac
+    ports:
+      - "7865:7865"
+    volumes:
+      - ./weights:/app/weights
+      - ./logs:/app/logs
+      - ./assets/weights:/app/assets/weights
+      - ./datasets:/app/datasets
+    environment:
+      - PYTHONUNBUFFERED=1
+    restart: unless-stopped
+    platform: linux/amd64
diff --git a/experiments/output_test.wav b/experiments/output_test.wav
new file mode 100644
index 000000000..468ab13b6
Binary files /dev/null and b/experiments/output_test.wav differ
diff --git a/experiments/voice1_to_voice2.wav b/experiments/voice1_to_voice2.wav
new file mode 100644
index 000000000..8095fa0fb
Binary files /dev/null and b/experiments/voice1_to_voice2.wav differ
diff --git a/infer-web.py b/infer-web.py
index 47596d539..eade72a28 100644
--- a/infer-web.py
+++ b/infer-web.py
@@ -114,6 +114,11 @@ def forward_dml(ctx, x, scale):
 if if_gpu_ok and len(gpu_infos) > 0:
     gpu_info = "\n".join(gpu_infos)
     default_batch_size = min(mem) // 2
+elif torch.backends.mps.is_available():
+    if_gpu_ok = True
+    gpu_infos.append("0\tApple Silicon MPS")
+    gpu_info = "Apple Silicon MPS detected"
+    default_batch_size = 4
 else:
     gpu_info = i18n("很遗憾您这没有能用的显卡来支持您训练")
     default_batch_size = 1
@@ -220,6 +225,14 @@ def preprocess_dataset(trainset_dir, exp_dir, sr, n_p):
     os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True)
     f = open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "w")
     f.close()
+    
+    # Verify trainset_dir exists
+    if not os.path.exists(trainset_dir):
+        error_msg = f"Training folder does not exist: {trainset_dir}"
+        logger.error(error_msg)
+        yield error_msg
+        return
+    
     cmd = '"%s" infer/modules/train/preprocess.py "%s" %s %s "%s/logs/%s" %s %.1f' % (
         config.python_cmd,
         trainset_dir,
@@ -231,8 +244,19 @@ def preprocess_dataset(trainset_dir, exp_dir, sr, n_p):
         config.preprocess_per,
     )
     logger.info("Execute: " + cmd)
-    # , stdin=PIPE, stdout=PIPE,stderr=PIPE,cwd=now_dir
-    p = Popen(cmd, shell=True)
+    print(f"Starting preprocessing: {cmd}")
+    # Use shell=False with proper argument list for better reliability
+    cmd_args = [
+        config.python_cmd,
+        "infer/modules/train/preprocess.py",
+        trainset_dir,
+        str(sr),
+        str(n_p),
+        f"{now_dir}/logs/{exp_dir}",
+        str(config.noparallel),
+        str(config.preprocess_per),
+    ]
+    p = Popen(cmd_args, cwd=now_dir)
     # 煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
     done = [False]
     threading.Thread(
diff --git a/infer-web.pyi b/infer-web.pyi
new file mode 100644
index 000000000..9b6cfe35f
--- /dev/null
+++ b/infer-web.pyi
@@ -0,0 +1,1625 @@
+import os
+import sys
+from dotenv import load_dotenv
+
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+load_dotenv()
+from infer.modules.vc.modules import VC
+from infer.modules.uvr5.modules import uvr
+from infer.lib.train.process_ckpt import (
+    change_info,
+    extract_small_model,
+    merge,
+    show_info,
+)
+from i18n.i18n import I18nAuto
+from configs.config import Config
+from sklearn.cluster import MiniBatchKMeans
+import torch, platform
+import numpy as np
+import gradio as gr
+import faiss
+import fairseq
+import pathlib
+import json
+from time import sleep
+from subprocess import Popen
+from random import shuffle
+import warnings
+import traceback
+import threading
+import shutil
+import logging
+
+
+logging.getLogger("numba").setLevel(logging.WARNING)
+logging.getLogger("httpx").setLevel(logging.WARNING)
+
+logger = logging.getLogger(__name__)
+
+tmp = os.path.join(now_dir, "TEMP")
+shutil.rmtree(tmp, ignore_errors=True)
+shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack" % (now_dir), ignore_errors=True)
+shutil.rmtree("%s/runtime/Lib/site-packages/uvr5_pack" % (now_dir), ignore_errors=True)
+os.makedirs(tmp, exist_ok=True)
+os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True)
+os.makedirs(os.path.join(now_dir, "assets/weights"), exist_ok=True)
+os.environ["TEMP"] = tmp
+warnings.filterwarnings("ignore")
+torch.manual_seed(114514)
+
+
+config = Config()
+vc = VC(config)
+
+
+if config.dml == True:
+
+    def forward_dml(ctx, x, scale):
+        ctx.scale = scale
+        res = x.clone().detach()
+        return res
+
+    fairseq.modules.grad_multiply.GradMultiply.forward = forward_dml
+i18n = I18nAuto()
+logger.info(i18n)
+# 判断是否有能用来训练和加速推理的N卡
+ngpu = torch.cuda.device_count()
+gpu_infos = []
+mem = []
+if_gpu_ok = False
+
+if torch.cuda.is_available() or ngpu != 0:
+    for i in range(ngpu):
+        gpu_name = torch.cuda.get_device_name(i)
+        if any(
+            value in gpu_name.upper()
+            for value in [
+                "10",
+                "16",
+                "20",
+                "30",
+                "40",
+                "A2",
+                "A3",
+                "A4",
+                "P4",
+                "A50",
+                "500",
+                "A60",
+                "70",
+                "80",
+                "90",
+                "M4",
+                "T4",
+                "TITAN",
+                "4060",
+                "L",
+                "6000",
+            ]
+        ):
+            # A10#A100#V100#A40#P40#M40#K80#A4500
+            if_gpu_ok = True  # 至少有一张能用的N卡
+            gpu_infos.append("%s\t%s" % (i, gpu_name))
+            mem.append(
+                int(
+                    torch.cuda.get_device_properties(i).total_memory
+                    / 1024
+                    / 1024
+                    / 1024
+                    + 0.4
+                )
+            )
+if if_gpu_ok and len(gpu_infos) > 0:
+    gpu_info = "\n".join(gpu_infos)
+    default_batch_size = min(mem) // 2
+else:
+    gpu_info = i18n("很遗憾您这没有能用的显卡来支持您训练")
+    default_batch_size = 1
+gpus = "-".join([i[0] for i in gpu_infos])
+
+from gradio.events import Dependency
+
+class ToolButton(gr.Button, gr.components.FormComponent):
+    """Small button with single emoji as text, fits inside gradio forms"""
+
+    def __init__(self, **kwargs):
+        super().__init__(variant="tool", **kwargs)
+
+    def get_block_name(self):
+        return "button"
+    from typing import Callable, Literal, Sequence, Any, TYPE_CHECKING
+    from gradio.blocks import Block
+    if TYPE_CHECKING:
+        from gradio.components import Timer
+        from gradio.components.base import Component
+
+
+weight_root = os.getenv("weight_root")
+weight_uvr5_root = os.getenv("weight_uvr5_root")
+index_root = os.getenv("index_root")
+outside_index_root = os.getenv("outside_index_root")
+
+names = []
+for name in os.listdir(weight_root):
+    if name.endswith(".pth"):
+        names.append(name)
+index_paths = []
+
+
+def lookup_indices(index_root):
+    global index_paths
+    for root, dirs, files in os.walk(index_root, topdown=False):
+        for name in files:
+            if name.endswith(".index") and "trained" not in name:
+                index_paths.append("%s/%s" % (root, name))
+
+
+lookup_indices(index_root)
+lookup_indices(outside_index_root)
+uvr5_names = []
+for name in os.listdir(weight_uvr5_root):
+    if name.endswith(".pth") or "onnx" in name:
+        uvr5_names.append(name.replace(".pth", ""))
+
+
+def change_choices():
+    names = []
+    for name in os.listdir(weight_root):
+        if name.endswith(".pth"):
+            names.append(name)
+    index_paths = []
+    for root, dirs, files in os.walk(index_root, topdown=False):
+        for name in files:
+            if name.endswith(".index") and "trained" not in name:
+                index_paths.append("%s/%s" % (root, name))
+    return {"choices": sorted(names), "__type__": "update"}, {
+        "choices": sorted(index_paths),
+        "__type__": "update",
+    }
+
+
+def clean():
+    return {"value": "", "__type__": "update"}
+
+
+def export_onnx(ModelPath, ExportedPath):
+    from infer.modules.onnx.export import export_onnx as eo
+
+    eo(ModelPath, ExportedPath)
+
+
+sr_dict = {
+    "32k": 32000,
+    "40k": 40000,
+    "48k": 48000,
+}
+
+
+def if_done(done, p):
+    while 1:
+        if p.poll() is None:
+            sleep(0.5)
+        else:
+            break
+    done[0] = True
+
+
+def if_done_multi(done, ps):
+    while 1:
+        # poll==None代表进程未结束
+        # 只要有一个进程未结束都不停
+        flag = 1
+        for p in ps:
+            if p.poll() is None:
+                flag = 0
+                sleep(0.5)
+                break
+        if flag == 1:
+            break
+    done[0] = True
+
+
+def preprocess_dataset(trainset_dir, exp_dir, sr, n_p):
+    sr = sr_dict[sr]
+    os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True)
+    f = open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "w")
+    f.close()
+    cmd = '"%s" infer/modules/train/preprocess.py "%s" %s %s "%s/logs/%s" %s %.1f' % (
+        config.python_cmd,
+        trainset_dir,
+        sr,
+        n_p,
+        now_dir,
+        exp_dir,
+        config.noparallel,
+        config.preprocess_per,
+    )
+    logger.info("Execute: " + cmd)
+    # , stdin=PIPE, stdout=PIPE,stderr=PIPE,cwd=now_dir
+    p = Popen(cmd, shell=True)
+    # 煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
+    done = [False]
+    threading.Thread(
+        target=if_done,
+        args=(
+            done,
+            p,
+        ),
+    ).start()
+    while 1:
+        with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f:
+            yield (f.read())
+        sleep(1)
+        if done[0]:
+            break
+    with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f:
+        log = f.read()
+    logger.info(log)
+    yield log
+
+
+# but2.click(extract_f0,[gpus6,np7,f0method8,if_f0_3,trainset_dir4],[info2])
+def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, gpus_rmvpe):
+    gpus = gpus.split("-")
+    os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True)
+    f = open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "w")
+    f.close()
+    if if_f0:
+        if f0method != "rmvpe_gpu":
+            cmd = (
+                '"%s" infer/modules/train/extract/extract_f0_print.py "%s/logs/%s" %s %s'
+                % (
+                    config.python_cmd,
+                    now_dir,
+                    exp_dir,
+                    n_p,
+                    f0method,
+                )
+            )
+            logger.info("Execute: " + cmd)
+            p = Popen(
+                cmd, shell=True, cwd=now_dir
+            )  # , stdin=PIPE, stdout=PIPE,stderr=PIPE
+            # 煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
+            done = [False]
+            threading.Thread(
+                target=if_done,
+                args=(
+                    done,
+                    p,
+                ),
+            ).start()
+        else:
+            if gpus_rmvpe != "-":
+                gpus_rmvpe = gpus_rmvpe.split("-")
+                leng = len(gpus_rmvpe)
+                ps = []
+                for idx, n_g in enumerate(gpus_rmvpe):
+                    cmd = (
+                        '"%s" infer/modules/train/extract/extract_f0_rmvpe.py %s %s %s "%s/logs/%s" %s '
+                        % (
+                            config.python_cmd,
+                            leng,
+                            idx,
+                            n_g,
+                            now_dir,
+                            exp_dir,
+                            config.is_half,
+                        )
+                    )
+                    logger.info("Execute: " + cmd)
+                    p = Popen(
+                        cmd, shell=True, cwd=now_dir
+                    )  # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
+                    ps.append(p)
+                # 煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
+                done = [False]
+                threading.Thread(
+                    target=if_done_multi,  #
+                    args=(
+                        done,
+                        ps,
+                    ),
+                ).start()
+            else:
+                cmd = (
+                    config.python_cmd
+                    + ' infer/modules/train/extract/extract_f0_rmvpe_dml.py "%s/logs/%s" '
+                    % (
+                        now_dir,
+                        exp_dir,
+                    )
+                )
+                logger.info("Execute: " + cmd)
+                p = Popen(
+                    cmd, shell=True, cwd=now_dir
+                )  # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
+                p.wait()
+                done = [True]
+        while 1:
+            with open(
+                "%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r"
+            ) as f:
+                yield (f.read())
+            sleep(1)
+            if done[0]:
+                break
+        with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f:
+            log = f.read()
+        logger.info(log)
+        yield log
+    # 对不同part分别开多进程
+    """
+    n_part=int(sys.argv[1])
+    i_part=int(sys.argv[2])
+    i_gpu=sys.argv[3]
+    exp_dir=sys.argv[4]
+    os.environ["CUDA_VISIBLE_DEVICES"]=str(i_gpu)
+    """
+    leng = len(gpus)
+    ps = []
+    for idx, n_g in enumerate(gpus):
+        cmd = (
+            '"%s" infer/modules/train/extract_feature_print.py %s %s %s %s "%s/logs/%s" %s %s'
+            % (
+                config.python_cmd,
+                config.device,
+                leng,
+                idx,
+                n_g,
+                now_dir,
+                exp_dir,
+                version19,
+                config.is_half,
+            )
+        )
+        logger.info("Execute: " + cmd)
+        p = Popen(
+            cmd, shell=True, cwd=now_dir
+        )  # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
+        ps.append(p)
+    # 煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
+    done = [False]
+    threading.Thread(
+        target=if_done_multi,
+        args=(
+            done,
+            ps,
+        ),
+    ).start()
+    while 1:
+        with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f:
+            yield (f.read())
+        sleep(1)
+        if done[0]:
+            break
+    with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f:
+        log = f.read()
+    logger.info(log)
+    yield log
+
+
+def get_pretrained_models(path_str, f0_str, sr2):
+    if_pretrained_generator_exist = os.access(
+        "assets/pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), os.F_OK
+    )
+    if_pretrained_discriminator_exist = os.access(
+        "assets/pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK
+    )
+    if not if_pretrained_generator_exist:
+        logger.warning(
+            "assets/pretrained%s/%sG%s.pth not exist, will not use pretrained model",
+            path_str,
+            f0_str,
+            sr2,
+        )
+    if not if_pretrained_discriminator_exist:
+        logger.warning(
+            "assets/pretrained%s/%sD%s.pth not exist, will not use pretrained model",
+            path_str,
+            f0_str,
+            sr2,
+        )
+    return (
+        (
+            "assets/pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2)
+            if if_pretrained_generator_exist
+            else ""
+        ),
+        (
+            "assets/pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2)
+            if if_pretrained_discriminator_exist
+            else ""
+        ),
+    )
+
+
+def change_sr2(sr2, if_f0_3, version19):
+    path_str = "" if version19 == "v1" else "_v2"
+    f0_str = "f0" if if_f0_3 else ""
+    return get_pretrained_models(path_str, f0_str, sr2)
+
+
+def change_version19(sr2, if_f0_3, version19):
+    path_str = "" if version19 == "v1" else "_v2"
+    if sr2 == "32k" and version19 == "v1":
+        sr2 = "40k"
+    to_return_sr2 = (
+        {"choices": ["40k", "48k"], "__type__": "update", "value": sr2}
+        if version19 == "v1"
+        else {"choices": ["40k", "48k", "32k"], "__type__": "update", "value": sr2}
+    )
+    f0_str = "f0" if if_f0_3 else ""
+    return (
+        *get_pretrained_models(path_str, f0_str, sr2),
+        to_return_sr2,
+    )
+
+
+def change_f0(if_f0_3, sr2, version19):  # f0method8,pretrained_G14,pretrained_D15
+    path_str = "" if version19 == "v1" else "_v2"
+    return (
+        {"visible": if_f0_3, "__type__": "update"},
+        {"visible": if_f0_3, "__type__": "update"},
+        *get_pretrained_models(path_str, "f0" if if_f0_3 == True else "", sr2),
+    )
+
+
+# but3.click(click_train,[exp_dir1,sr2,if_f0_3,save_epoch10,total_epoch11,batch_size12,if_save_latest13,pretrained_G14,pretrained_D15,gpus16])
+def click_train(
+    exp_dir1,
+    sr2,
+    if_f0_3,
+    spk_id5,
+    save_epoch10,
+    total_epoch11,
+    batch_size12,
+    if_save_latest13,
+    pretrained_G14,
+    pretrained_D15,
+    gpus16,
+    if_cache_gpu17,
+    if_save_every_weights18,
+    version19,
+):
+    # 生成filelist
+    exp_dir = "%s/logs/%s" % (now_dir, exp_dir1)
+    os.makedirs(exp_dir, exist_ok=True)
+    gt_wavs_dir = "%s/0_gt_wavs" % (exp_dir)
+    feature_dir = (
+        "%s/3_feature256" % (exp_dir)
+        if version19 == "v1"
+        else "%s/3_feature768" % (exp_dir)
+    )
+    if if_f0_3:
+        f0_dir = "%s/2a_f0" % (exp_dir)
+        f0nsf_dir = "%s/2b-f0nsf" % (exp_dir)
+        names = (
+            set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)])
+            & set([name.split(".")[0] for name in os.listdir(feature_dir)])
+            & set([name.split(".")[0] for name in os.listdir(f0_dir)])
+            & set([name.split(".")[0] for name in os.listdir(f0nsf_dir)])
+        )
+    else:
+        names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set(
+            [name.split(".")[0] for name in os.listdir(feature_dir)]
+        )
+    opt = []
+    for name in names:
+        if if_f0_3:
+            opt.append(
+                "%s/%s.wav|%s/%s.npy|%s/%s.wav.npy|%s/%s.wav.npy|%s"
+                % (
+                    gt_wavs_dir.replace("\\", "\\\\"),
+                    name,
+                    feature_dir.replace("\\", "\\\\"),
+                    name,
+                    f0_dir.replace("\\", "\\\\"),
+                    name,
+                    f0nsf_dir.replace("\\", "\\\\"),
+                    name,
+                    spk_id5,
+                )
+            )
+        else:
+            opt.append(
+                "%s/%s.wav|%s/%s.npy|%s"
+                % (
+                    gt_wavs_dir.replace("\\", "\\\\"),
+                    name,
+                    feature_dir.replace("\\", "\\\\"),
+                    name,
+                    spk_id5,
+                )
+            )
+    fea_dim = 256 if version19 == "v1" else 768
+    if if_f0_3:
+        for _ in range(2):
+            opt.append(
+                "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s"
+                % (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5)
+            )
+    else:
+        for _ in range(2):
+            opt.append(
+                "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s"
+                % (now_dir, sr2, now_dir, fea_dim, spk_id5)
+            )
+    shuffle(opt)
+    with open("%s/filelist.txt" % exp_dir, "w") as f:
+        f.write("\n".join(opt))
+    logger.debug("Write filelist done")
+    # 生成config#无需生成config
+    # cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e mi-test -sr 40k -f0 1 -bs 4 -g 0 -te 10 -se 5 -pg pretrained/f0G40k.pth -pd pretrained/f0D40k.pth -l 1 -c 0"
+    logger.info("Use gpus: %s", str(gpus16))
+    if pretrained_G14 == "":
+        logger.info("No pretrained Generator")
+    if pretrained_D15 == "":
+        logger.info("No pretrained Discriminator")
+    if version19 == "v1" or sr2 == "40k":
+        config_path = "v1/%s.json" % sr2
+    else:
+        config_path = "v2/%s.json" % sr2
+    config_save_path = os.path.join(exp_dir, "config.json")
+    if not pathlib.Path(config_save_path).exists():
+        with open(config_save_path, "w", encoding="utf-8") as f:
+            json.dump(
+                config.json_config[config_path],
+                f,
+                ensure_ascii=False,
+                indent=4,
+                sort_keys=True,
+            )
+            f.write("\n")
+    if gpus16:
+        cmd = (
+            '"%s" infer/modules/train/train.py -e "%s" -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s'
+            % (
+                config.python_cmd,
+                exp_dir1,
+                sr2,
+                1 if if_f0_3 else 0,
+                batch_size12,
+                gpus16,
+                total_epoch11,
+                save_epoch10,
+                "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "",
+                "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "",
+                1 if if_save_latest13 == i18n("是") else 0,
+                1 if if_cache_gpu17 == i18n("是") else 0,
+                1 if if_save_every_weights18 == i18n("是") else 0,
+                version19,
+            )
+        )
+    else:
+        cmd = (
+            '"%s" infer/modules/train/train.py -e "%s" -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s'
+            % (
+                config.python_cmd,
+                exp_dir1,
+                sr2,
+                1 if if_f0_3 else 0,
+                batch_size12,
+                total_epoch11,
+                save_epoch10,
+                "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "",
+                "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "",
+                1 if if_save_latest13 == i18n("是") else 0,
+                1 if if_cache_gpu17 == i18n("是") else 0,
+                1 if if_save_every_weights18 == i18n("是") else 0,
+                version19,
+            )
+        )
+    logger.info("Execute: " + cmd)
+    p = Popen(cmd, shell=True, cwd=now_dir)
+    p.wait()
+    return "训练结束, 您可查看控制台训练日志或实验文件夹下的train.log"
+
+
+# but4.click(train_index, [exp_dir1], info3)
+def train_index(exp_dir1, version19):
+    # exp_dir = "%s/logs/%s" % (now_dir, exp_dir1)
+    exp_dir = "logs/%s" % (exp_dir1)
+    os.makedirs(exp_dir, exist_ok=True)
+    feature_dir = (
+        "%s/3_feature256" % (exp_dir)
+        if version19 == "v1"
+        else "%s/3_feature768" % (exp_dir)
+    )
+    if not os.path.exists(feature_dir):
+        return "请先进行特征提取!"
+    listdir_res = list(os.listdir(feature_dir))
+    if len(listdir_res) == 0:
+        return "请先进行特征提取！"
+    infos = []
+    npys = []
+    for name in sorted(listdir_res):
+        phone = np.load("%s/%s" % (feature_dir, name))
+        npys.append(phone)
+    big_npy = np.concatenate(npys, 0)
+    big_npy_idx = np.arange(big_npy.shape[0])
+    np.random.shuffle(big_npy_idx)
+    big_npy = big_npy[big_npy_idx]
+    if big_npy.shape[0] > 2e5:
+        infos.append("Trying doing kmeans %s shape to 10k centers." % big_npy.shape[0])
+        yield "\n".join(infos)
+        try:
+            big_npy = (
+                MiniBatchKMeans(
+                    n_clusters=10000,
+                    verbose=True,
+                    batch_size=256 * config.n_cpu,
+                    compute_labels=False,
+                    init="random",
+                )
+                .fit(big_npy)
+                .cluster_centers_
+            )
+        except:
+            info = traceback.format_exc()
+            logger.info(info)
+            infos.append(info)
+            yield "\n".join(infos)
+
+    np.save("%s/total_fea.npy" % exp_dir, big_npy)
+    n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39)
+    infos.append("%s,%s" % (big_npy.shape, n_ivf))
+    yield "\n".join(infos)
+    index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf)
+    # index = faiss.index_factory(256if version19=="v1"else 768, "IVF%s,PQ128x4fs,RFlat"%n_ivf)
+    infos.append("training")
+    yield "\n".join(infos)
+    index_ivf = faiss.extract_index_ivf(index)  #
+    index_ivf.nprobe = 1
+    index.train(big_npy)
+    faiss.write_index(
+        index,
+        "%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index"
+        % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
+    )
+    infos.append("adding")
+    yield "\n".join(infos)
+    batch_size_add = 8192
+    for i in range(0, big_npy.shape[0], batch_size_add):
+        index.add(big_npy[i : i + batch_size_add])
+    faiss.write_index(
+        index,
+        "%s/added_IVF%s_Flat_nprobe_%s_%s_%s.index"
+        % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
+    )
+    infos.append(
+        "成功构建索引 added_IVF%s_Flat_nprobe_%s_%s_%s.index"
+        % (n_ivf, index_ivf.nprobe, exp_dir1, version19)
+    )
+    try:
+        link = os.link if platform.system() == "Windows" else os.symlink
+        link(
+            "%s/added_IVF%s_Flat_nprobe_%s_%s_%s.index"
+            % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
+            "%s/%s_IVF%s_Flat_nprobe_%s_%s_%s.index"
+            % (
+                outside_index_root,
+                exp_dir1,
+                n_ivf,
+                index_ivf.nprobe,
+                exp_dir1,
+                version19,
+            ),
+        )
+        infos.append("链接索引到外部-%s" % (outside_index_root))
+    except:
+        infos.append("链接索引到外部-%s失败" % (outside_index_root))
+
+    # faiss.write_index(index, '%s/added_IVF%s_Flat_FastScan_%s.index'%(exp_dir,n_ivf,version19))
+    # infos.append("成功构建索引，added_IVF%s_Flat_FastScan_%s.index"%(n_ivf,version19))
+    yield "\n".join(infos)
+
+
+# but5.click(train1key, [exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, gpus6, np7, f0method8, save_epoch10, total_epoch11, batch_size12, if_save_latest13, pretrained_G14, pretrained_D15, gpus16, if_cache_gpu17], info3)
+def train1key(
+    exp_dir1,
+    sr2,
+    if_f0_3,
+    trainset_dir4,
+    spk_id5,
+    np7,
+    f0method8,
+    save_epoch10,
+    total_epoch11,
+    batch_size12,
+    if_save_latest13,
+    pretrained_G14,
+    pretrained_D15,
+    gpus16,
+    if_cache_gpu17,
+    if_save_every_weights18,
+    version19,
+    gpus_rmvpe,
+):
+    infos = []
+
+    def get_info_str(strr):
+        infos.append(strr)
+        return "\n".join(infos)
+
+    # step1:处理数据
+    yield get_info_str(i18n("step1:正在处理数据"))
+    [get_info_str(_) for _ in preprocess_dataset(trainset_dir4, exp_dir1, sr2, np7)]
+
+    # step2a:提取音高
+    yield get_info_str(i18n("step2:正在提取音高&正在提取特征"))
+    [
+        get_info_str(_)
+        for _ in extract_f0_feature(
+            gpus16, np7, f0method8, if_f0_3, exp_dir1, version19, gpus_rmvpe
+        )
+    ]
+
+    # step3a:训练模型
+    yield get_info_str(i18n("step3a:正在训练模型"))
+    click_train(
+        exp_dir1,
+        sr2,
+        if_f0_3,
+        spk_id5,
+        save_epoch10,
+        total_epoch11,
+        batch_size12,
+        if_save_latest13,
+        pretrained_G14,
+        pretrained_D15,
+        gpus16,
+        if_cache_gpu17,
+        if_save_every_weights18,
+        version19,
+    )
+    yield get_info_str(
+        i18n("训练结束, 您可查看控制台训练日志或实验文件夹下的train.log")
+    )
+
+    # step3b:训练索引
+    [get_info_str(_) for _ in train_index(exp_dir1, version19)]
+    yield get_info_str(i18n("全流程结束！"))
+
+
+#                    ckpt_path2.change(change_info_,[ckpt_path2],[sr__,if_f0__])
+def change_info_(ckpt_path):
+    if not os.path.exists(ckpt_path.replace(os.path.basename(ckpt_path), "train.log")):
+        return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}
+    try:
+        with open(
+            ckpt_path.replace(os.path.basename(ckpt_path), "train.log"), "r"
+        ) as f:
+            info = eval(f.read().strip("\n").split("\n")[0].split("\t")[-1])
+            sr, f0 = info["sample_rate"], info["if_f0"]
+            version = "v2" if ("version" in info and info["version"] == "v2") else "v1"
+            return sr, str(f0), version
+    except:
+        traceback.print_exc()
+        return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}
+
+
+F0GPUVisible = config.dml == False
+
+
+def change_f0_method(f0method8):
+    if f0method8 == "rmvpe_gpu":
+        visible = F0GPUVisible
+    else:
+        visible = False
+    return {"visible": visible, "__type__": "update"}
+
+
+with gr.Blocks(title="RVC WebUI") as app:
+    gr.Markdown("## RVC WebUI")
+    gr.Markdown(
+        value=i18n(
+            "本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责. <br>如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录<b>LICENSE</b>."
+        )
+    )
+    with gr.Tabs():
+        with gr.TabItem(i18n("模型推理")):
+            with gr.Row():
+                sid0 = gr.Dropdown(label=i18n("推理音色"), choices=sorted(names))
+                with gr.Column():
+                    refresh_button = gr.Button(
+                        i18n("刷新音色列表和索引路径"), variant="primary"
+                    )
+                    clean_button = gr.Button(i18n("卸载音色省显存"), variant="primary")
+                spk_item = gr.Slider(
+                    minimum=0,
+                    maximum=2333,
+                    step=1,
+                    label=i18n("请选择说话人id"),
+                    value=0,
+                    visible=False,
+                    interactive=True,
+                )
+                clean_button.click(
+                    fn=clean, inputs=[], outputs=[sid0], api_name="infer_clean"
+                )
+            with gr.TabItem(i18n("单次推理")):
+                with gr.Group():
+                    with gr.Row():
+                        with gr.Column():
+                            vc_transform0 = gr.Number(
+                                label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"),
+                                value=0,
+                            )
+                            input_audio0 = gr.Textbox(
+                                label=i18n(
+                                    "输入待处理音频文件路径(默认是正确格式示例)"
+                                ),
+                                placeholder="C:\\Users\\Desktop\\audio_example.wav",
+                            )
+                            file_index1 = gr.Textbox(
+                                label=i18n(
+                                    "特征检索库文件路径,为空则使用下拉的选择结果"
+                                ),
+                                placeholder="C:\\Users\\Desktop\\model_example.index",
+                                interactive=True,
+                            )
+                            file_index2 = gr.Dropdown(
+                                label=i18n("自动检测index路径,下拉式选择(dropdown)"),
+                                choices=sorted(index_paths),
+                                interactive=True,
+                            )
+                            f0method0 = gr.Radio(
+                                label=i18n(
+                                    "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU,rmvpe效果最好且微吃GPU"
+                                ),
+                                choices=(
+                                    ["pm", "harvest", "crepe", "rmvpe"]
+                                    if config.dml == False
+                                    else ["pm", "harvest", "rmvpe"]
+                                ),
+                                value="rmvpe",
+                                interactive=True,
+                            )
+
+                        with gr.Column():
+                            resample_sr0 = gr.Slider(
+                                minimum=0,
+                                maximum=48000,
+                                label=i18n("后处理重采样至最终采样率，0为不进行重采样"),
+                                value=0,
+                                step=1,
+                                interactive=True,
+                            )
+                            rms_mix_rate0 = gr.Slider(
+                                minimum=0,
+                                maximum=1,
+                                label=i18n(
+                                    "输入源音量包络替换输出音量包络融合比例，越靠近1越使用输出包络"
+                                ),
+                                value=0.25,
+                                interactive=True,
+                            )
+                            protect0 = gr.Slider(
+                                minimum=0,
+                                maximum=0.5,
+                                label=i18n(
+                                    "保护清辅音和呼吸声，防止电音撕裂等artifact，拉满0.5不开启，调低加大保护力度但可能降低索引效果"
+                                ),
+                                value=0.33,
+                                step=0.01,
+                                interactive=True,
+                            )
+                            filter_radius0 = gr.Slider(
+                                minimum=0,
+                                maximum=7,
+                                label=i18n(
+                                    ">=3则使用对harvest音高识别的结果使用中值滤波，数值为滤波半径，使用可以削弱哑音"
+                                ),
+                                value=3,
+                                step=1,
+                                interactive=True,
+                            )
+                            index_rate1 = gr.Slider(
+                                minimum=0,
+                                maximum=1,
+                                label=i18n("检索特征占比"),
+                                value=0.75,
+                                interactive=True,
+                            )
+                            f0_file = gr.File(
+                                label=i18n(
+                                    "F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"
+                                ),
+                                visible=False,
+                            )
+
+                            refresh_button.click(
+                                fn=change_choices,
+                                inputs=[],
+                                outputs=[sid0, file_index2],
+                                api_name="infer_refresh",
+                            )
+                            # file_big_npy1 = gr.Textbox(
+                            #     label=i18n("特征文件路径"),
+                            #     value="E:\\codes\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy",
+                            #     interactive=True,
+                            # )
+                with gr.Group():
+                    with gr.Column():
+                        but0 = gr.Button(i18n("转换"), variant="primary")
+                        with gr.Row():
+                            vc_output1 = gr.Textbox(label=i18n("输出信息"))
+                            vc_output2 = gr.Audio(
+                                label=i18n("输出音频(右下角三个点,点了可以下载)")
+                            )
+
+                        but0.click(
+                            vc.vc_single,
+                            [
+                                spk_item,
+                                input_audio0,
+                                vc_transform0,
+                                f0_file,
+                                f0method0,
+                                file_index1,
+                                file_index2,
+                                # file_big_npy1,
+                                index_rate1,
+                                filter_radius0,
+                                resample_sr0,
+                                rms_mix_rate0,
+                                protect0,
+                            ],
+                            [vc_output1, vc_output2],
+                            api_name="infer_convert",
+                        )
+            with gr.TabItem(i18n("批量推理")):
+                gr.Markdown(
+                    value=i18n(
+                        "批量转换, 输入待转换音频文件夹, 或上传多个音频文件, 在指定文件夹(默认opt)下输出转换的音频. "
+                    )
+                )
+                with gr.Row():
+                    with gr.Column():
+                        vc_transform1 = gr.Number(
+                            label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"),
+                            value=0,
+                        )
+                        opt_input = gr.Textbox(
+                            label=i18n("指定输出文件夹"), value="opt"
+                        )
+                        file_index3 = gr.Textbox(
+                            label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"),
+                            value="",
+                            interactive=True,
+                        )
+                        file_index4 = gr.Dropdown(
+                            label=i18n("自动检测index路径,下拉式选择(dropdown)"),
+                            choices=sorted(index_paths),
+                            interactive=True,
+                        )
+                        f0method1 = gr.Radio(
+                            label=i18n(
+                                "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU,rmvpe效果最好且微吃GPU"
+                            ),
+                            choices=(
+                                ["pm", "harvest", "crepe", "rmvpe"]
+                                if config.dml == False
+                                else ["pm", "harvest", "rmvpe"]
+                            ),
+                            value="rmvpe",
+                            interactive=True,
+                        )
+                        format1 = gr.Radio(
+                            label=i18n("导出文件格式"),
+                            choices=["wav", "flac", "mp3", "m4a"],
+                            value="wav",
+                            interactive=True,
+                        )
+
+                        refresh_button.click(
+                            fn=lambda: change_choices()[1],
+                            inputs=[],
+                            outputs=file_index4,
+                            api_name="infer_refresh_batch",
+                        )
+                        # file_big_npy2 = gr.Textbox(
+                        #     label=i18n("特征文件路径"),
+                        #     value="E:\\codes\\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy",
+                        #     interactive=True,
+                        # )
+
+                    with gr.Column():
+                        resample_sr1 = gr.Slider(
+                            minimum=0,
+                            maximum=48000,
+                            label=i18n("后处理重采样至最终采样率，0为不进行重采样"),
+                            value=0,
+                            step=1,
+                            interactive=True,
+                        )
+                        rms_mix_rate1 = gr.Slider(
+                            minimum=0,
+                            maximum=1,
+                            label=i18n(
+                                "输入源音量包络替换输出音量包络融合比例，越靠近1越使用输出包络"
+                            ),
+                            value=1,
+                            interactive=True,
+                        )
+                        protect1 = gr.Slider(
+                            minimum=0,
+                            maximum=0.5,
+                            label=i18n(
+                                "保护清辅音和呼吸声，防止电音撕裂等artifact，拉满0.5不开启，调低加大保护力度但可能降低索引效果"
+                            ),
+                            value=0.33,
+                            step=0.01,
+                            interactive=True,
+                        )
+                        filter_radius1 = gr.Slider(
+                            minimum=0,
+                            maximum=7,
+                            label=i18n(
+                                ">=3则使用对harvest音高识别的结果使用中值滤波，数值为滤波半径，使用可以削弱哑音"
+                            ),
+                            value=3,
+                            step=1,
+                            interactive=True,
+                        )
+                        index_rate2 = gr.Slider(
+                            minimum=0,
+                            maximum=1,
+                            label=i18n("检索特征占比"),
+                            value=1,
+                            interactive=True,
+                        )
+                with gr.Row():
+                    dir_input = gr.Textbox(
+                        label=i18n(
+                            "输入待处理音频文件夹路径(去文件管理器地址栏拷就行了)"
+                        ),
+                        placeholder="C:\\Users\\Desktop\\input_vocal_dir",
+                    )
+                    inputs = gr.File(
+                        file_count="multiple",
+                        label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹"),
+                    )
+
+                with gr.Row():
+                    but1 = gr.Button(i18n("转换"), variant="primary")
+                    vc_output3 = gr.Textbox(label=i18n("输出信息"))
+
+                    but1.click(
+                        vc.vc_multi,
+                        [
+                            spk_item,
+                            dir_input,
+                            opt_input,
+                            inputs,
+                            vc_transform1,
+                            f0method1,
+                            file_index3,
+                            file_index4,
+                            # file_big_npy2,
+                            index_rate2,
+                            filter_radius1,
+                            resample_sr1,
+                            rms_mix_rate1,
+                            protect1,
+                            format1,
+                        ],
+                        [vc_output3],
+                        api_name="infer_convert_batch",
+                    )
+                sid0.change(
+                    fn=vc.get_vc,
+                    inputs=[sid0, protect0, protect1],
+                    outputs=[spk_item, protect0, protect1, file_index2, file_index4],
+                    api_name="infer_change_voice",
+                )
+        with gr.TabItem(i18n("伴奏人声分离&去混响&去回声")):
+            with gr.Group():
+                gr.Markdown(
+                    value=i18n(
+                        "人声伴奏分离批量处理， 使用UVR5模型。 <br>合格的文件夹路径格式举例： E:\\codes\\py39\\vits_vc_gpu\\白鹭霜华测试样例(去文件管理器地址栏拷就行了)。 <br>模型分为三类： <br>1、保留人声：不带和声的音频选这个，对主人声保留比HP5更好。内置HP2和HP3两个模型，HP3可能轻微漏伴奏但对主人声保留比HP2稍微好一丁点； <br>2、仅保留主人声：带和声的音频选这个，对主人声可能有削弱。内置HP5一个模型； <br> 3、去混响、去延迟模型（by FoxJoy）：<br>  (1)MDX-Net(onnx_dereverb):对于双通道混响是最好的选择，不能去除单通道混响；<br>&emsp;(234)DeEcho:去除延迟效果。Aggressive比Normal去除得更彻底，DeReverb额外去除混响，可去除单声道混响，但是对高频重的板式混响去不干净。<br>去混响/去延迟，附：<br>1、DeEcho-DeReverb模型的耗时是另外2个DeEcho模型的接近2倍；<br>2、MDX-Net-Dereverb模型挺慢的；<br>3、个人推荐的最干净的配置是先MDX-Net再DeEcho-Aggressive。"
+                    )
+                )
+                with gr.Row():
+                    with gr.Column():
+                        dir_wav_input = gr.Textbox(
+                            label=i18n("输入待处理音频文件夹路径"),
+                            placeholder="C:\\Users\\Desktop\\todo-songs",
+                        )
+                        wav_inputs = gr.File(
+                            file_count="multiple",
+                            label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹"),
+                        )
+                    with gr.Column():
+                        model_choose = gr.Dropdown(
+                            label=i18n("模型"), choices=uvr5_names
+                        )
+                        agg = gr.Slider(
+                            minimum=0,
+                            maximum=20,
+                            step=1,
+                            label="人声提取激进程度",
+                            value=10,
+                            interactive=True,
+                            visible=False,  # 先不开放调整
+                        )
+                        opt_vocal_root = gr.Textbox(
+                            label=i18n("指定输出主人声文件夹"), value="opt"
+                        )
+                        opt_ins_root = gr.Textbox(
+                            label=i18n("指定输出非主人声文件夹"), value="opt"
+                        )
+                        format0 = gr.Radio(
+                            label=i18n("导出文件格式"),
+                            choices=["wav", "flac", "mp3", "m4a"],
+                            value="flac",
+                            interactive=True,
+                        )
+                    but2 = gr.Button(i18n("转换"), variant="primary")
+                    vc_output4 = gr.Textbox(label=i18n("输出信息"))
+                    but2.click(
+                        uvr,
+                        [
+                            model_choose,
+                            dir_wav_input,
+                            opt_vocal_root,
+                            wav_inputs,
+                            opt_ins_root,
+                            agg,
+                            format0,
+                        ],
+                        [vc_output4],
+                        api_name="uvr_convert",
+                    )
+        with gr.TabItem(i18n("训练")):
+            gr.Markdown(
+                value=i18n(
+                    "step1: 填写实验配置. 实验数据放在logs下, 每个实验一个文件夹, 需手工输入实验名路径, 内含实验配置, 日志, 训练得到的模型文件. "
+                )
+            )
+            with gr.Row():
+                exp_dir1 = gr.Textbox(label=i18n("输入实验名"), value="mi-test")
+                sr2 = gr.Radio(
+                    label=i18n("目标采样率"),
+                    choices=["40k", "48k"],
+                    value="40k",
+                    interactive=True,
+                )
+                if_f0_3 = gr.Radio(
+                    label=i18n("模型是否带音高指导(唱歌一定要, 语音可以不要)"),
+                    choices=[True, False],
+                    value=True,
+                    interactive=True,
+                )
+                version19 = gr.Radio(
+                    label=i18n("版本"),
+                    choices=["v1", "v2"],
+                    value="v2",
+                    interactive=True,
+                    visible=True,
+                )
+                np7 = gr.Slider(
+                    minimum=0,
+                    maximum=config.n_cpu,
+                    step=1,
+                    label=i18n("提取音高和处理数据使用的CPU进程数"),
+                    value=int(np.ceil(config.n_cpu / 1.5)),
+                    interactive=True,
+                )
+            with gr.Group():  # 暂时单人的, 后面支持最多4人的#数据处理
+                gr.Markdown(
+                    value=i18n(
+                        "step2a: 自动遍历训练文件夹下所有可解码成音频的文件并进行切片归一化, 在实验目录下生成2个wav文件夹; 暂时只支持单人训练. "
+                    )
+                )
+                with gr.Row():
+                    trainset_dir4 = gr.Textbox(
+                        label=i18n("输入训练文件夹路径"),
+                        value=i18n("E:\\语音音频+标注\\米津玄师\\src"),
+                    )
+                    spk_id5 = gr.Slider(
+                        minimum=0,
+                        maximum=4,
+                        step=1,
+                        label=i18n("请指定说话人id"),
+                        value=0,
+                        interactive=True,
+                    )
+                    but1 = gr.Button(i18n("处理数据"), variant="primary")
+                    info1 = gr.Textbox(label=i18n("输出信息"), value="")
+                    but1.click(
+                        preprocess_dataset,
+                        [trainset_dir4, exp_dir1, sr2, np7],
+                        [info1],
+                        api_name="train_preprocess",
+                    )
+            with gr.Group():
+                gr.Markdown(
+                    value=i18n(
+                        "step2b: 使用CPU提取音高(如果模型带音高), 使用GPU提取特征(选择卡号)"
+                    )
+                )
+                with gr.Row():
+                    with gr.Column():
+                        gpus6 = gr.Textbox(
+                            label=i18n(
+                                "以-分隔输入使用的卡号, 例如   0-1-2   使用卡0和卡1和卡2"
+                            ),
+                            value=gpus,
+                            interactive=True,
+                            visible=F0GPUVisible,
+                        )
+                        gpu_info9 = gr.Textbox(
+                            label=i18n("显卡信息"), value=gpu_info, visible=F0GPUVisible
+                        )
+                    with gr.Column():
+                        f0method8 = gr.Radio(
+                            label=i18n(
+                                "选择音高提取算法:输入歌声可用pm提速,高质量语音但CPU差可用dio提速,harvest质量更好但慢,rmvpe效果最好且微吃CPU/GPU"
+                            ),
+                            choices=["pm", "harvest", "dio", "rmvpe", "rmvpe_gpu"],
+                            value="rmvpe_gpu",
+                            interactive=True,
+                        )
+                        gpus_rmvpe = gr.Textbox(
+                            label=i18n(
+                                "rmvpe卡号配置：以-分隔输入使用的不同进程卡号,例如0-0-1使用在卡0上跑2个进程并在卡1上跑1个进程"
+                            ),
+                            value="%s-%s" % (gpus, gpus),
+                            interactive=True,
+                            visible=F0GPUVisible,
+                        )
+                    but2 = gr.Button(i18n("特征提取"), variant="primary")
+                    info2 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8)
+                    f0method8.change(
+                        fn=change_f0_method,
+                        inputs=[f0method8],
+                        outputs=[gpus_rmvpe],
+                    )
+                    but2.click(
+                        extract_f0_feature,
+                        [
+                            gpus6,
+                            np7,
+                            f0method8,
+                            if_f0_3,
+                            exp_dir1,
+                            version19,
+                            gpus_rmvpe,
+                        ],
+                        [info2],
+                        api_name="train_extract_f0_feature",
+                    )
+            with gr.Group():
+                gr.Markdown(value=i18n("step3: 填写训练设置, 开始训练模型和索引"))
+                with gr.Row():
+                    save_epoch10 = gr.Slider(
+                        minimum=1,
+                        maximum=50,
+                        step=1,
+                        label=i18n("保存频率save_every_epoch"),
+                        value=5,
+                        interactive=True,
+                    )
+                    total_epoch11 = gr.Slider(
+                        minimum=2,
+                        maximum=1000,
+                        step=1,
+                        label=i18n("总训练轮数total_epoch"),
+                        value=20,
+                        interactive=True,
+                    )
+                    batch_size12 = gr.Slider(
+                        minimum=1,
+                        maximum=40,
+                        step=1,
+                        label=i18n("每张显卡的batch_size"),
+                        value=default_batch_size,
+                        interactive=True,
+                    )
+                    if_save_latest13 = gr.Radio(
+                        label=i18n("是否仅保存最新的ckpt文件以节省硬盘空间"),
+                        choices=[i18n("是"), i18n("否")],
+                        value=i18n("否"),
+                        interactive=True,
+                    )
+                    if_cache_gpu17 = gr.Radio(
+                        label=i18n(
+                            "是否缓存所有训练集至显存. 10min以下小数据可缓存以加速训练, 大数据缓存会炸显存也加不了多少速"
+                        ),
+                        choices=[i18n("是"), i18n("否")],
+                        value=i18n("否"),
+                        interactive=True,
+                    )
+                    if_save_every_weights18 = gr.Radio(
+                        label=i18n(
+                            "是否在每次保存时间点将最终小模型保存至weights文件夹"
+                        ),
+                        choices=[i18n("是"), i18n("否")],
+                        value=i18n("否"),
+                        interactive=True,
+                    )
+                with gr.Row():
+                    pretrained_G14 = gr.Textbox(
+                        label=i18n("加载预训练底模G路径"),
+                        value="assets/pretrained_v2/f0G40k.pth",
+                        interactive=True,
+                    )
+                    pretrained_D15 = gr.Textbox(
+                        label=i18n("加载预训练底模D路径"),
+                        value="assets/pretrained_v2/f0D40k.pth",
+                        interactive=True,
+                    )
+                    sr2.change(
+                        change_sr2,
+                        [sr2, if_f0_3, version19],
+                        [pretrained_G14, pretrained_D15],
+                    )
+                    version19.change(
+                        change_version19,
+                        [sr2, if_f0_3, version19],
+                        [pretrained_G14, pretrained_D15, sr2],
+                    )
+                    if_f0_3.change(
+                        change_f0,
+                        [if_f0_3, sr2, version19],
+                        [f0method8, gpus_rmvpe, pretrained_G14, pretrained_D15],
+                    )
+                    gpus16 = gr.Textbox(
+                        label=i18n(
+                            "以-分隔输入使用的卡号, 例如   0-1-2   使用卡0和卡1和卡2"
+                        ),
+                        value=gpus,
+                        interactive=True,
+                    )
+                    but3 = gr.Button(i18n("训练模型"), variant="primary")
+                    but4 = gr.Button(i18n("训练特征索引"), variant="primary")
+                    but5 = gr.Button(i18n("一键训练"), variant="primary")
+                    info3 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=10)
+                    but3.click(
+                        click_train,
+                        [
+                            exp_dir1,
+                            sr2,
+                            if_f0_3,
+                            spk_id5,
+                            save_epoch10,
+                            total_epoch11,
+                            batch_size12,
+                            if_save_latest13,
+                            pretrained_G14,
+                            pretrained_D15,
+                            gpus16,
+                            if_cache_gpu17,
+                            if_save_every_weights18,
+                            version19,
+                        ],
+                        info3,
+                        api_name="train_start",
+                    )
+                    but4.click(train_index, [exp_dir1, version19], info3)
+                    but5.click(
+                        train1key,
+                        [
+                            exp_dir1,
+                            sr2,
+                            if_f0_3,
+                            trainset_dir4,
+                            spk_id5,
+                            np7,
+                            f0method8,
+                            save_epoch10,
+                            total_epoch11,
+                            batch_size12,
+                            if_save_latest13,
+                            pretrained_G14,
+                            pretrained_D15,
+                            gpus16,
+                            if_cache_gpu17,
+                            if_save_every_weights18,
+                            version19,
+                            gpus_rmvpe,
+                        ],
+                        info3,
+                        api_name="train_start_all",
+                    )
+
+        with gr.TabItem(i18n("ckpt处理")):
+            with gr.Group():
+                gr.Markdown(value=i18n("模型融合, 可用于测试音色融合"))
+                with gr.Row():
+                    ckpt_a = gr.Textbox(
+                        label=i18n("A模型路径"), value="", interactive=True
+                    )
+                    ckpt_b = gr.Textbox(
+                        label=i18n("B模型路径"), value="", interactive=True
+                    )
+                    alpha_a = gr.Slider(
+                        minimum=0,
+                        maximum=1,
+                        label=i18n("A模型权重"),
+                        value=0.5,
+                        interactive=True,
+                    )
+                with gr.Row():
+                    sr_ = gr.Radio(
+                        label=i18n("目标采样率"),
+                        choices=["40k", "48k"],
+                        value="40k",
+                        interactive=True,
+                    )
+                    if_f0_ = gr.Radio(
+                        label=i18n("模型是否带音高指导"),
+                        choices=[i18n("是"), i18n("否")],
+                        value=i18n("是"),
+                        interactive=True,
+                    )
+                    info__ = gr.Textbox(
+                        label=i18n("要置入的模型信息"),
+                        value="",
+                        max_lines=8,
+                        interactive=True,
+                    )
+                    name_to_save0 = gr.Textbox(
+                        label=i18n("保存的模型名不带后缀"),
+                        value="",
+                        max_lines=1,
+                        interactive=True,
+                    )
+                    version_2 = gr.Radio(
+                        label=i18n("模型版本型号"),
+                        choices=["v1", "v2"],
+                        value="v1",
+                        interactive=True,
+                    )
+                with gr.Row():
+                    but6 = gr.Button(i18n("融合"), variant="primary")
+                    info4 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8)
+                but6.click(
+                    merge,
+                    [
+                        ckpt_a,
+                        ckpt_b,
+                        alpha_a,
+                        sr_,
+                        if_f0_,
+                        info__,
+                        name_to_save0,
+                        version_2,
+                    ],
+                    info4,
+                    api_name="ckpt_merge",
+                )  # def merge(path1,path2,alpha1,sr,f0,info):
+            with gr.Group():
+                gr.Markdown(
+                    value=i18n("修改模型信息(仅支持weights文件夹下提取的小模型文件)")
+                )
+                with gr.Row():
+                    ckpt_path0 = gr.Textbox(
+                        label=i18n("模型路径"), value="", interactive=True
+                    )
+                    info_ = gr.Textbox(
+                        label=i18n("要改的模型信息"),
+                        value="",
+                        max_lines=8,
+                        interactive=True,
+                    )
+                    name_to_save1 = gr.Textbox(
+                        label=i18n("保存的文件名, 默认空为和源文件同名"),
+                        value="",
+                        max_lines=8,
+                        interactive=True,
+                    )
+                with gr.Row():
+                    but7 = gr.Button(i18n("修改"), variant="primary")
+                    info5 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8)
+                but7.click(
+                    change_info,
+                    [ckpt_path0, info_, name_to_save1],
+                    info5,
+                    api_name="ckpt_modify",
+                )
+            with gr.Group():
+                gr.Markdown(
+                    value=i18n("查看模型信息(仅支持weights文件夹下提取的小模型文件)")
+                )
+                with gr.Row():
+                    ckpt_path1 = gr.Textbox(
+                        label=i18n("模型路径"), value="", interactive=True
+                    )
+                    but8 = gr.Button(i18n("查看"), variant="primary")
+                    info6 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8)
+                but8.click(show_info, [ckpt_path1], info6, api_name="ckpt_show")
+            with gr.Group():
+                gr.Markdown(
+                    value=i18n(
+                        "模型提取(输入logs文件夹下大文件模型路径),适用于训一半不想训了模型没有自动提取保存小文件模型,或者想测试中间模型的情况"
+                    )
+                )
+                with gr.Row():
+                    ckpt_path2 = gr.Textbox(
+                        label=i18n("模型路径"),
+                        value="E:\\codes\\py39\\logs\\mi-test_f0_48k\\G_23333.pth",
+                        interactive=True,
+                    )
+                    save_name = gr.Textbox(
+                        label=i18n("保存名"), value="", interactive=True
+                    )
+                    sr__ = gr.Radio(
+                        label=i18n("目标采样率"),
+                        choices=["32k", "40k", "48k"],
+                        value="40k",
+                        interactive=True,
+                    )
+                    if_f0__ = gr.Radio(
+                        label=i18n("模型是否带音高指导,1是0否"),
+                        choices=["1", "0"],
+                        value="1",
+                        interactive=True,
+                    )
+                    version_1 = gr.Radio(
+                        label=i18n("模型版本型号"),
+                        choices=["v1", "v2"],
+                        value="v2",
+                        interactive=True,
+                    )
+                    info___ = gr.Textbox(
+                        label=i18n("要置入的模型信息"),
+                        value="",
+                        max_lines=8,
+                        interactive=True,
+                    )
+                    but9 = gr.Button(i18n("提取"), variant="primary")
+                    info7 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8)
+                    ckpt_path2.change(
+                        change_info_, [ckpt_path2], [sr__, if_f0__, version_1]
+                    )
+                but9.click(
+                    extract_small_model,
+                    [ckpt_path2, save_name, sr__, if_f0__, info___, version_1],
+                    info7,
+                    api_name="ckpt_extract",
+                )
+
+        with gr.TabItem(i18n("Onnx导出")):
+            with gr.Row():
+                ckpt_dir = gr.Textbox(
+                    label=i18n("RVC模型路径"), value="", interactive=True
+                )
+            with gr.Row():
+                onnx_dir = gr.Textbox(
+                    label=i18n("Onnx输出路径"), value="", interactive=True
+                )
+            with gr.Row():
+                infoOnnx = gr.Label(label="info")
+            with gr.Row():
+                butOnnx = gr.Button(i18n("导出Onnx模型"), variant="primary")
+            butOnnx.click(
+                export_onnx, [ckpt_dir, onnx_dir], infoOnnx, api_name="export_onnx"
+            )
+
+        tab_faq = i18n("常见问题解答")
+        with gr.TabItem(tab_faq):
+            try:
+                if tab_faq == "常见问题解答":
+                    with open("docs/cn/faq.md", "r", encoding="utf8") as f:
+                        info = f.read()
+                else:
+                    with open("docs/en/faq_en.md", "r", encoding="utf8") as f:
+                        info = f.read()
+                gr.Markdown(value=info)
+            except:
+                gr.Markdown(traceback.format_exc())
+
+    if config.iscolab:
+        app.queue(concurrency_count=511, max_size=1022).launch(share=True)
+    else:
+        app.queue(concurrency_count=511, max_size=1022).launch(
+            server_name="0.0.0.0",
+            inbrowser=not config.noautoopen,
+            server_port=config.listen_port,
+            quiet=True,
+        )
\ No newline at end of file
diff --git a/infer/lib/train/utils.py b/infer/lib/train/utils.py
index 765c54c61..8184ca004 100644
--- a/infer/lib/train/utils.py
+++ b/infer/lib/train/utils.py
@@ -235,8 +235,9 @@ def plot_spectrogram_to_numpy(spectrogram):
     plt.tight_layout()
 
     fig.canvas.draw()
-    data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep="")
-    data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
+    # Fix for newer matplotlib versions
+    buf = fig.canvas.buffer_rgba()
+    data = np.asarray(buf)[:, :, :3]
     plt.close()
     return data
 
@@ -266,8 +267,9 @@ def plot_alignment_to_numpy(alignment, info=None):
     plt.tight_layout()
 
     fig.canvas.draw()
-    data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep="")
-    data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
+    # Fix for newer matplotlib versions
+    buf = fig.canvas.buffer_rgba()
+    data = np.asarray(buf)[:, :, :3]
     plt.close()
     return data
 
diff --git a/infer/modules/train/extract_feature_print.py b/infer/modules/train/extract_feature_print.py
index 96a69dee4..1f9d725b9 100644
--- a/infer/modules/train/extract_feature_print.py
+++ b/infer/modules/train/extract_feature_print.py
@@ -86,6 +86,15 @@ def readwave(wav_path, normalize=False):
         % model_path
     )
     exit(0)
+
+# Fix for PyTorch 2.6+ weights_only default change
+import torch.serialization
+try:
+    import fairseq.data.dictionary
+    torch.serialization.add_safe_globals([fairseq.data.dictionary.Dictionary])
+except:
+    pass
+
 models, saved_cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task(
     [model_path],
     suffix="",
diff --git a/infer/modules/train/train.py b/infer/modules/train/train.py
index 38a567828..aefdd0349 100644
--- a/infer/modules/train/train.py
+++ b/infer/modules/train/train.py
@@ -18,6 +18,22 @@
 
 import torch
 
+# Device detection for MPS (Apple Silicon), CUDA, or CPU
+USE_MPS = False
+USE_CUDA = False
+DEVICE = "cpu"
+
+if torch.backends.mps.is_available():
+    USE_MPS = True
+    DEVICE = "mps"
+    print("Using Apple Silicon MPS GPU acceleration")
+elif torch.cuda.is_available():
+    USE_CUDA = True
+    DEVICE = "cuda"
+    print("Using NVIDIA CUDA GPU acceleration")
+else:
+    print("No GPU detected, using CPU")
+
 try:
     import intel_extension_for_pytorch as ipex  # pylint: disable=import-error, unused-import
 
@@ -79,6 +95,15 @@
 global_step = 0
 
 
+def to_device(tensor, rank=0):
+    """Move tensor to appropriate device (MPS, CUDA, or CPU)"""
+    if USE_MPS:
+        return tensor.to("mps")
+    elif USE_CUDA:
+        return tensor.cuda(rank, non_blocking=True)
+    return tensor
+
+
 class EpochRecorder:
     def __init__(self):
         self.last_time = ttime()
@@ -167,7 +192,7 @@ def run(rank, n_gpus, hps, logger: logging.Logger):
             hps.data.filter_length // 2 + 1,
             hps.train.segment_size // hps.data.hop_length,
             **hps.model,
-            is_half=hps.train.fp16_run,
+            is_half=hps.train.fp16_run and not USE_MPS,  # MPS doesn't support fp16 well
             sr=hps.sample_rate,
         )
     else:
@@ -175,12 +200,17 @@ def run(rank, n_gpus, hps, logger: logging.Logger):
             hps.data.filter_length // 2 + 1,
             hps.train.segment_size // hps.data.hop_length,
             **hps.model,
-            is_half=hps.train.fp16_run,
+            is_half=hps.train.fp16_run and not USE_MPS,
         )
-    if torch.cuda.is_available():
+    # Move models to device
+    if USE_MPS:
+        net_g = net_g.to("mps")
+    elif USE_CUDA:
         net_g = net_g.cuda(rank)
     net_d = MultiPeriodDiscriminator(hps.model.use_spectral_norm)
-    if torch.cuda.is_available():
+    if USE_MPS:
+        net_d = net_d.to("mps")
+    elif USE_CUDA:
         net_d = net_d.cuda(rank)
     optim_g = torch.optim.AdamW(
         net_g.parameters(),
@@ -194,16 +224,16 @@ def run(rank, n_gpus, hps, logger: logging.Logger):
         betas=hps.train.betas,
         eps=hps.train.eps,
     )
-    # net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True)
-    # net_d = DDP(net_d, device_ids=[rank], find_unused_parameters=True)
+    # DDP wrapping (not used for MPS single-GPU)
     if hasattr(torch, "xpu") and torch.xpu.is_available():
         pass
-    elif torch.cuda.is_available():
+    elif USE_CUDA:
         net_g = DDP(net_g, device_ids=[rank])
         net_d = DDP(net_d, device_ids=[rank])
-    else:
+    elif not USE_MPS:
         net_g = DDP(net_g)
         net_d = DDP(net_d)
+    # MPS: no DDP needed for single GPU
 
     try:  # 如果能加载自动resume
         _, _, _, epoch_str = utils.load_checkpoint(
@@ -260,7 +290,8 @@ def run(rank, n_gpus, hps, logger: logging.Logger):
         optim_d, gamma=hps.train.lr_decay, last_epoch=epoch_str - 2
     )
 
-    scaler = GradScaler(enabled=hps.train.fp16_run)
+    # MPS doesn't support fp16 GradScaler well
+    scaler = GradScaler(enabled=hps.train.fp16_run and not USE_MPS)
 
     cache = []
     for epoch in range(epoch_str, hps.train.epochs + 1):
@@ -341,18 +372,18 @@ def train_and_evaluate(
                         wave_lengths,
                         sid,
                     ) = info
-                # Load on CUDA
-                if torch.cuda.is_available():
-                    phone = phone.cuda(rank, non_blocking=True)
-                    phone_lengths = phone_lengths.cuda(rank, non_blocking=True)
+                # Load on GPU (CUDA or MPS)
+                if USE_CUDA or USE_MPS:
+                    phone = to_device(phone, rank)
+                    phone_lengths = to_device(phone_lengths, rank)
                     if hps.if_f0 == 1:
-                        pitch = pitch.cuda(rank, non_blocking=True)
-                        pitchf = pitchf.cuda(rank, non_blocking=True)
-                    sid = sid.cuda(rank, non_blocking=True)
-                    spec = spec.cuda(rank, non_blocking=True)
-                    spec_lengths = spec_lengths.cuda(rank, non_blocking=True)
-                    wave = wave.cuda(rank, non_blocking=True)
-                    wave_lengths = wave_lengths.cuda(rank, non_blocking=True)
+                        pitch = to_device(pitch, rank)
+                        pitchf = to_device(pitchf, rank)
+                    sid = to_device(sid, rank)
+                    spec = to_device(spec, rank)
+                    spec_lengths = to_device(spec_lengths, rank)
+                    wave = to_device(wave, rank)
+                    wave_lengths = to_device(wave_lengths, rank)
                 # Cache on list
                 if hps.if_f0 == 1:
                     cache.append(
@@ -412,21 +443,20 @@ def train_and_evaluate(
             ) = info
         else:
             phone, phone_lengths, spec, spec_lengths, wave, wave_lengths, sid = info
-        ## Load on CUDA
-        if (hps.if_cache_data_in_gpu == False) and torch.cuda.is_available():
-            phone = phone.cuda(rank, non_blocking=True)
-            phone_lengths = phone_lengths.cuda(rank, non_blocking=True)
+        ## Load on GPU (CUDA or MPS)
+        if (hps.if_cache_data_in_gpu == False) and (USE_CUDA or USE_MPS):
+            phone = to_device(phone, rank)
+            phone_lengths = to_device(phone_lengths, rank)
             if hps.if_f0 == 1:
-                pitch = pitch.cuda(rank, non_blocking=True)
-                pitchf = pitchf.cuda(rank, non_blocking=True)
-            sid = sid.cuda(rank, non_blocking=True)
-            spec = spec.cuda(rank, non_blocking=True)
-            spec_lengths = spec_lengths.cuda(rank, non_blocking=True)
-            wave = wave.cuda(rank, non_blocking=True)
-            # wave_lengths = wave_lengths.cuda(rank, non_blocking=True)
+                pitch = to_device(pitch, rank)
+                pitchf = to_device(pitchf, rank)
+            sid = to_device(sid, rank)
+            spec = to_device(spec, rank)
+            spec_lengths = to_device(spec_lengths, rank)
+            wave = to_device(wave, rank)
 
         # Calculate
-        with autocast(enabled=hps.train.fp16_run):
+        with autocast(enabled=hps.train.fp16_run and not USE_MPS):
             if hps.if_f0 == 1:
                 (
                     y_hat,
diff --git a/infer/modules/vc/utils.py b/infer/modules/vc/utils.py
index c128707cf..1e4afde83 100644
--- a/infer/modules/vc/utils.py
+++ b/infer/modules/vc/utils.py
@@ -1,7 +1,17 @@
 import os
 
+import torch
 from fairseq import checkpoint_utils
 
+# PyTorch 2.6+ compatibility: weights_only=True by default breaks fairseq loading
+# Monkey-patch torch.load to use weights_only=False for model loading
+_original_torch_load = torch.load
+def _patched_torch_load(*args, **kwargs):
+    if 'weights_only' not in kwargs:
+        kwargs['weights_only'] = False
+    return _original_torch_load(*args, **kwargs)
+torch.load = _patched_torch_load
+
 
 def get_index_path_from_model(sid):
     return next(
diff --git a/inference_log.txt b/inference_log.txt
new file mode 100644
index 000000000..2b53e1c21
--- /dev/null
+++ b/inference_log.txt
@@ -0,0 +1,24 @@
+/opt/anaconda3/envs/rvc/lib/python3.10/site-packages/pyworld/__init__.py:13: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
+  import pkg_resources
+2025-12-08 01:10:34 | INFO | configs.config | No supported Nvidia GPU found
+2025-12-08 01:10:34 | INFO | configs.config | MPS available but using CPU for stability
+2025-12-08 01:10:34 | INFO | configs.config | overwrite v1/32k.json
+2025-12-08 01:10:34 | INFO | configs.config | overwrite v1/40k.json
+2025-12-08 01:10:34 | INFO | configs.config | overwrite v1/48k.json
+2025-12-08 01:10:34 | INFO | configs.config | overwrite v2/48k.json
+2025-12-08 01:10:34 | INFO | configs.config | overwrite v2/32k.json
+2025-12-08 01:10:34 | INFO | configs.config | overwrite preprocess_per to 3
+2025-12-08 01:10:34 | INFO | configs.config | Use cpu instead
+2025-12-08 01:10:34 | INFO | configs.config | Half-precision floating-point: False, device: cpu
+2025-12-08 01:10:34 | INFO | infer.modules.vc.modules | Get sid: Voice_New.pth
+2025-12-08 01:10:34 | INFO | infer.modules.vc.modules | Loading: assets/weights/Voice_New.pth
+/opt/anaconda3/envs/rvc/lib/python3.10/site-packages/torch/nn/utils/weight_norm.py:144: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`.
+  WeightNorm.apply(module, name, dim)
+2025-12-08 01:10:34 | INFO | infer.modules.vc.modules | Select index: logs/Voice_New/added_IVF86_Flat_nprobe_1.index
+2025-12-08 01:10:34 | INFO | fairseq.tasks.hubert_pretraining | current directory is /Users/arunkumarv/Music/Voice Clone/rvc-webui
+2025-12-08 01:10:34 | INFO | fairseq.tasks.hubert_pretraining | HubertPretrainingTask Config {'_name': 'hubert_pretraining', 'data': 'metadata', 'fine_tuning': False, 'labels': ['km'], 'label_dir': 'label', 'label_rate': 50.0, 'sample_rate': 16000, 'normalize': False, 'enable_padding': False, 'max_keep_size': None, 'max_sample_size': 250000, 'min_sample_size': 32000, 'single_target': False, 'random_crop': True, 'pad_audio': False}
+2025-12-08 01:10:34 | INFO | fairseq.models.hubert.hubert | HubertModel Config: {'_name': 'hubert', 'label_rate': 50.0, 'extractor_mode': default, 'encoder_layers': 12, 'encoder_embed_dim': 768, 'encoder_ffn_embed_dim': 3072, 'encoder_attention_heads': 12, 'activation_fn': gelu, 'layer_type': transformer, 'dropout': 0.1, 'attention_dropout': 0.1, 'activation_dropout': 0.0, 'encoder_layerdrop': 0.05, 'dropout_input': 0.1, 'dropout_features': 0.1, 'final_dim': 256, 'untie_final_proj': True, 'layer_norm_first': False, 'conv_feature_layers': '[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2', 'conv_bias': False, 'logit_temp': 0.1, 'target_glu': False, 'feature_grad_mult': 0.1, 'mask_length': 10, 'mask_prob': 0.8, 'mask_selection': static, 'mask_other': 0.0, 'no_mask_overlap': False, 'mask_min_space': 1, 'mask_channel_length': 10, 'mask_channel_prob': 0.0, 'mask_channel_selection': static, 'mask_channel_other': 0.0, 'no_mask_channel_overlap': False, 'mask_channel_min_space': 1, 'conv_pos': 128, 'conv_pos_groups': 16, 'latent_temp': [2.0, 0.5, 0.999995], 'skip_masked': False, 'skip_nomask': False, 'checkpoint_activations': False, 'required_seq_len_multiple': 2, 'depthwise_conv_kernel_size': 31, 'attn_type': '', 'pos_enc_type': 'abs', 'fp16': False}
+/opt/anaconda3/envs/rvc/lib/python3.10/site-packages/torch/nn/utils/weight_norm.py:144: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`.
+  WeightNorm.apply(module, name, dim)
+/opt/anaconda3/envs/rvc/lib/python3.10/multiprocessing/resource_tracker.py:224: UserWarning: resource_tracker: There appear to be 1 leaked semaphore objects to clean up at shutdown
+  warnings.warn('resource_tracker: There appear to be %d '
diff --git a/run_inference.py b/run_inference.py
new file mode 100644
index 000000000..f77debf23
--- /dev/null
+++ b/run_inference.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python3
+import os
+import sys
+
+os.environ['OMP_NUM_THREADS'] = '1'
+os.environ['MKL_NUM_THREADS'] = '1'
+os.environ['OPENBLAS_NUM_THREADS'] = '1'
+os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
+
+import torch
+import faiss
+faiss.omp_set_num_threads(1)
+
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+
+from dotenv import load_dotenv
+load_dotenv()
+
+os.environ['weight_root'] = 'assets/weights'
+os.environ['index_root'] = 'logs'
+os.environ['rmvpe_root'] = 'assets/rmvpe'
+
+print(f"PyTorch version: {torch.__version__}")
+print(f"CUDA available: {torch.cuda.is_available()}")
+print(f"MPS available: {torch.backends.mps.is_available() if hasattr(torch.backends, 'mps') else False}")
+
+from configs.config import Config
+config = Config()
+print(f"Device selected by config: {config.device}")
+
+from infer.modules.vc.modules import VC
+vc_instance = VC(config)
+
+model_name = "Voice_New.pth"
+input_audio = "/Users/arunkumarv/Music/Voice Clone/Voice_convert.mp3"
+output_audio = "/Users/arunkumarv/Music/Voice Clone/rvc-webui/output/Voice_New/converted.wav"
+
+os.makedirs(os.path.dirname(output_audio), exist_ok=True)
+
+print(f"\nLoading model: {model_name}")
+vc_instance.get_vc(model_name)
+
+print(f"Converting audio: {input_audio}")
+print(f"Output will be saved to: {output_audio}")
+
+print("Starting vc_single...")
+sys.stdout.flush()
+
+import soundfile as sf
+
+try:
+    result_message, audio_result = vc_instance.vc_single(
+        sid=0,
+        input_audio_path=input_audio,
+        f0_up_key=0,
+        f0_file=None,
+        f0_method="rmvpe",
+        file_index=f"logs/Voice_New/added_IVF86_Flat_nprobe_1.index",
+        file_index2="",
+        index_rate=0.75,
+        filter_radius=3,
+        resample_sr=0,
+        rms_mix_rate=0.25,
+        protect=0.33
+    )
+    
+    print(f"\nResult: {result_message}")
+    
+    sample_rate, audio_data = audio_result
+    if audio_data is not None and sample_rate is not None:
+        sf.write(output_audio, audio_data, sample_rate)
+        print(f"✓ Audio saved successfully to: {output_audio}")
+    else:
+        print("✗ Conversion failed!")
+        sys.exit(1)
+except Exception as e:
+    import traceback
+    print(f"Error: {e}")
+    traceback.print_exc()
+    sys.exit(1)
diff --git a/run_inference_api.py b/run_inference_api.py
new file mode 100644
index 000000000..e1fcfc7ad
--- /dev/null
+++ b/run_inference_api.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python3
+from gradio_client import Client
+import os
+
+client = Client("http://localhost:7865")
+
+input_audio = "/Users/arunkumarv/Music/Voice Clone/Voice_convert.mp3"
+output_dir = "/Users/arunkumarv/Music/Voice Clone/rvc-webui/output/Voice_New"
+os.makedirs(output_dir, exist_ok=True)
+
+print(f"Submitting inference request...")
+print(f"Input: {input_audio}")
+print(f"Model: Voice_New.pth")
+print(f"F0 Method: pm")
+
+result = client.predict(
+    spk_item="Voice_New.pth",
+    input_audio0=input_audio,
+    vc_transform0=0,  # pitch shift
+    f0_file=None,
+    f0method0="pm",  # F0 method
+    file_index1="",  # manual index path
+    file_index2="logs/Voice_New/added_IVF86_Flat_nprobe_1.index",  # dropdown selection
+    index_rate1=0.75,  # retrieval mix
+    filter_radius0=3,  # median filter
+    resample_sr0=0,  # output sample rate
+    rms_mix_rate0=0.25,  # volume envelope
+    protect0=0.33,  # consonant protection
+    api_name="/infer_convert"
+)
+
+output_message, output_audio_tuple = result
+print(f"\nResult: {output_message}")
+
+if output_audio_tuple and len(output_audio_tuple) > 1:
+    output_path = os.path.join(output_dir, "converted.wav")
+    # Gradio returns the audio file path
+    if isinstance(output_audio_tuple, tuple) and len(output_audio_tuple) == 2:
+        sr, audio_file = output_audio_tuple
+        print(f"✓ Audio converted successfully!")
+        print(f"Sample rate: {sr} Hz")
+        print(f"Output: {output_path}")
+    else:
+        print(f"Unexpected output format: {output_audio_tuple}")
+else:
+    print("✗ Conversion failed!")
diff --git a/setup-doc.md b/setup-doc.md
new file mode 100644
index 000000000..153f17980
--- /dev/null
+++ b/setup-doc.md
@@ -0,0 +1,67 @@
+# RVC WebUI Setup Documentation
+
+## Environment
+
+- macOS (Apple Silicon)
+- Python 3.10 (Conda)
+- PyTorch with MPS support
+
+## Problems and Solutions
+
+### 1. fairseq Installation Failure
+
+**Error**: `omegaconf` metadata parsing error with pip 25.x
+
+**Fix**:
+```bash
+pip install "pip<24.1"
+pip install fairseq==0.12.2
+```
+
+### 2. Gradio Version Mismatch
+
+**Error**: `concurrency_count` parameter not recognized (wrong gradio version installed)
+
+**Fix**:
+```bash
+pip install gradio==3.34.0
+```
+
+### 3. gradio_client.serializing Module Not Found
+
+**Error**: `ModuleNotFoundError: No module named 'gradio_client.serializing'`
+
+**Cause**: Modern gradio-client (2.x) removed the serializing module that gradio 3.34.0 expects
+
+**Fix**:
+```bash
+pip install gradio-client==0.2.7
+```
+
+## Working Installation Sequence
+
+```bash
+conda create -n rvc python=3.10 -y
+conda activate rvc
+
+pip install "pip<24.1"
+pip install torch torchvision torchaudio
+pip install -r requirements.txt
+pip install fairseq==0.12.2
+pip install gradio==3.34.0
+pip install gradio-client==0.2.7
+
+python tools/download_models.py
+python infer-web.py
+```
+
+## Verification
+
+```bash
+python -c "import torch; print(torch.backends.mps.is_available())"
+python -c "from gradio_client.serializing import Serializable; print('OK')"
+```
+
+## Access
+
+WebUI runs at: http://localhost:7865
diff --git a/temp_download_dialects.py b/temp_download_dialects.py
new file mode 100644
index 000000000..c93b14491
--- /dev/null
+++ b/temp_download_dialects.py
@@ -0,0 +1,33 @@
+
+import os
+from datasets import load_dataset
+from pathlib import Path
+import soundfile as sf
+
+print("Loading English Dialects dataset...")
+ds = load_dataset("ylacombe/english_dialects", split="train", streaming=True)
+
+datasets_dir = Path("datasets/accent_non_native")
+datasets_dir.mkdir(parents=True, exist_ok=True)
+
+count = 0
+max_samples = 150
+
+# Target dialects for "non-native" feel (regional accents)
+target_dialects = ["scottish", "irish", "welsh", "northern"]
+
+for sample in ds:
+    dialect = sample.get("dialect", "").lower()
+    
+    if any(d in dialect for d in target_dialects) and count < max_samples:
+        audio = sample["audio"]
+        out_path = datasets_dir / f"{dialect}_{count}.wav"
+        
+        sf.write(str(out_path), audio["array"], audio["sampling_rate"])
+        count += 1
+        print(f"Saved {out_path.name} (total: {count})")
+    
+    if count >= max_samples:
+        break
+
+print(f"Downloaded {count} dialect samples")
diff --git a/temp_download_genshin.py b/temp_download_genshin.py
new file mode 100644
index 000000000..27e2e38eb
--- /dev/null
+++ b/temp_download_genshin.py
@@ -0,0 +1,38 @@
+
+import os
+from datasets import load_dataset
+from pathlib import Path
+import soundfile as sf
+
+print("Loading Genshin Voice dataset...")
+ds = load_dataset("simon3000/genshin-voice", split="train", streaming=True)
+
+datasets_dir = Path("datasets/anime_airy")
+datasets_dir.mkdir(parents=True, exist_ok=True)
+
+count = 0
+max_samples = 150
+
+# Target characters with airy/cute voices (English)
+target_chars = ["paimon", "barbara", "kokomi", "nahida", "klee", "qiqi", "diona"]
+
+for sample in ds:
+    speaker = str(sample.get("speaker", "")).lower()
+    lang = sample.get("language", "")
+    
+    # Only English samples
+    if lang != "en":
+        continue
+        
+    if any(char in speaker for char in target_chars) and count < max_samples:
+        audio = sample["audio"]
+        out_path = datasets_dir / f"{speaker.replace(' ', '_')}_{count}.wav"
+        
+        sf.write(str(out_path), audio["array"], audio["sampling_rate"])
+        count += 1
+        print(f"Saved {out_path.name} (total: {count})")
+    
+    if count >= max_samples:
+        break
+
+print(f"Downloaded {count} anime voice samples")
diff --git a/temp_download_hifi.py b/temp_download_hifi.py
new file mode 100644
index 000000000..7724e5af6
--- /dev/null
+++ b/temp_download_hifi.py
@@ -0,0 +1,46 @@
+
+import os
+from datasets import load_dataset
+from pathlib import Path
+import soundfile as sf
+
+print("Loading Hi-Fi TTS dataset...")
+ds = load_dataset("MikhailT/hifi-tts", split="train", streaming=True)
+
+datasets_dir = Path("datasets")
+
+# Speaker ID to voice type mapping for HiFi-TTS
+# HiFi has 10 speakers total
+voice_map = {
+    "92": "male_low",      # Deep male
+    "6097": "male_mid",    # Mid male
+    "6670": "female_low",  # Lower female
+    "6671": "female_high", # Higher female
+    "8051": "singing_male",
+    "9017": "singing_female",
+}
+
+counts = {k: 0 for k in set(voice_map.values())}
+max_per_type = 100
+
+for sample in ds:
+    speaker = str(sample.get("speaker", ""))
+    
+    if speaker in voice_map:
+        voice_type = voice_map[speaker]
+        
+        if counts[voice_type] < max_per_type:
+            out_dir = datasets_dir / voice_type
+            out_dir.mkdir(parents=True, exist_ok=True)
+            
+            audio = sample["audio"]
+            out_path = out_dir / f"hifi_{speaker}_{counts[voice_type]}.wav"
+            
+            sf.write(str(out_path), audio["array"], audio["sampling_rate"])
+            counts[voice_type] += 1
+            print(f"Saved {out_path.name} ({voice_type}: {counts[voice_type]})")
+    
+    if all(c >= max_per_type for c in counts.values()):
+        break
+
+print(f"Final counts: {counts}")
diff --git a/temp_download_libritts.py b/temp_download_libritts.py
new file mode 100644
index 000000000..b8d7dd48f
--- /dev/null
+++ b/temp_download_libritts.py
@@ -0,0 +1,45 @@
+
+import os
+from datasets import load_dataset
+from pathlib import Path
+
+print("Loading LibriTTS dataset (this may take a while)...")
+ds = load_dataset("mythicinfinity/libritts", "clean", split="train.clean.100", streaming=True)
+
+# Sample speakers - take first 50 samples per target voice type
+# LibriTTS speaker IDs are in the 'speaker_id' column
+target_speakers = {
+    "male_low": ["19", "26", "1272"],      # Deep male voices
+    "male_mid": ["32", "40", "1089"],      # Mid-range male
+    "female_low": ["87", "103", "1284"],   # Lower female
+    "female_high": ["121", "237", "3570"], # Higher female
+}
+
+datasets_dir = Path("datasets")
+counts = {k: 0 for k in target_speakers}
+max_per_type = 100  # Max samples per voice type
+
+for sample in ds:
+    speaker = str(sample.get("speaker_id", ""))
+    
+    for voice_type, speakers in target_speakers.items():
+        if speaker in speakers and counts[voice_type] < max_per_type:
+            out_dir = datasets_dir / voice_type
+            out_dir.mkdir(parents=True, exist_ok=True)
+            
+            audio = sample["audio"]
+            out_path = out_dir / f"{speaker}_{sample['id']}.wav"
+            
+            # Save audio
+            import soundfile as sf
+            sf.write(str(out_path), audio["array"], audio["sampling_rate"])
+            
+            counts[voice_type] += 1
+            print(f"Saved {out_path.name} ({voice_type}: {counts[voice_type]})")
+    
+    # Check if we have enough
+    if all(c >= max_per_type for c in counts.values()):
+        print("Collected enough samples!")
+        break
+
+print(f"Final counts: {counts}")
diff --git a/tools/audio_preprocessor.py b/tools/audio_preprocessor.py
new file mode 100644
index 000000000..7e564169a
--- /dev/null
+++ b/tools/audio_preprocessor.py
@@ -0,0 +1,81 @@
+import os
+import argparse
+import librosa
+import soundfile as sf
+import numpy as np
+from pydub import AudioSegment
+from pydub.silence import split_on_silence
+from tqdm import tqdm
+
+def process_audio(input_path, output_dir, sr=40000, min_silence_len=500, silence_thresh=-40, chunk_len=10000):
+    """
+    Process audio file: convert to wav, normalize, remove silence, split into chunks
+    """
+    filename = os.path.basename(input_path).split('.')[0]
+    
+    print(f"Processing {input_path}...")
+    
+    # Load audio
+    try:
+        audio = AudioSegment.from_file(input_path)
+    except Exception as e:
+        print(f"Error loading {input_path}: {e}")
+        return
+
+    # Normalize
+    audio = audio.normalize()
+    
+    # Split on silence
+    chunks = split_on_silence(
+        audio,
+        min_silence_len=min_silence_len,
+        silence_thresh=silence_thresh,
+        keep_silence=100
+    )
+    
+    # Combine small chunks to reach target length
+    output_chunks = []
+    current_chunk = AudioSegment.empty()
+    
+    for chunk in chunks:
+        if len(current_chunk) + len(chunk) < chunk_len:
+            current_chunk += chunk
+        else:
+            output_chunks.append(current_chunk)
+            current_chunk = chunk
+    
+    if len(current_chunk) > 0:
+        output_chunks.append(current_chunk)
+        
+    # Save chunks
+    os.makedirs(output_dir, exist_ok=True)
+    
+    for i, chunk in enumerate(output_chunks):
+        # Convert to target sample rate
+        chunk = chunk.set_frame_rate(sr).set_channels(1)
+        
+        # Export
+        out_name = f"{filename}_{i:03d}.wav"
+        out_path = os.path.join(output_dir, out_name)
+        chunk.export(out_path, format="wav")
+        
+    print(f"Saved {len(output_chunks)} chunks to {output_dir}")
+
+def main():
+    parser = argparse.ArgumentParser(description="Audio Dataset Preprocessor for RVC")
+    parser.add_argument("--input", "-i", required=True, help="Input file or directory")
+    parser.add_argument("--output", "-o", required=True, help="Output directory")
+    parser.add_argument("--sr", type=int, default=40000, help="Target sample rate (default: 40000)")
+    parser.add_argument("--len", type=int, default=10000, help="Target chunk length in ms (default: 10000)")
+    
+    args = parser.parse_args()
+    
+    if os.path.isfile(args.input):
+        process_audio(args.input, args.output, sr=args.sr, chunk_len=args.len)
+    elif os.path.isdir(args.input):
+        files = [f for f in os.listdir(args.input) if f.lower().endswith(('.wav', '.mp3', '.flac', '.m4a', '.ogg'))]
+        for f in tqdm(files):
+            process_audio(os.path.join(args.input, f), args.output, sr=args.sr, chunk_len=args.len)
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/download_datasets.py b/tools/download_datasets.py
new file mode 100644
index 000000000..85407ea82
--- /dev/null
+++ b/tools/download_datasets.py
@@ -0,0 +1,246 @@
+"""
+Simplified dataset downloader - downloads voice data directly.
+"""
+import os
+import sys
+from pathlib import Path
+
+# Ensure we have datasets library
+try:
+    from datasets import load_dataset
+    import soundfile as sf
+except ImportError:
+    print("Installing required packages...")
+    os.system("pip install datasets soundfile")
+    from datasets import load_dataset
+    import soundfile as sf
+
+# Setup paths
+SCRIPT_DIR = Path(__file__).parent
+PROJECT_ROOT = SCRIPT_DIR.parent
+DATASETS_DIR = PROJECT_ROOT / "datasets"
+
+def download_libritts():
+    """Download LibriTTS samples for male/female voices."""
+    print("\n=== Downloading LibriTTS (Male/Female Voices) ===")
+    
+    ds = load_dataset("mythicinfinity/libritts", "clean", split="train.clean.100", streaming=True)
+    
+    # Speaker ID to voice type mapping
+    speaker_voice_map = {
+        "19": "male_low",
+        "26": "male_low", 
+        "1272": "male_low",
+        "32": "male_mid",
+        "40": "male_mid",
+        "1089": "male_mid",
+        "87": "female_low",
+        "103": "female_low",
+        "1284": "female_low",
+        "121": "female_high",
+        "237": "female_high",
+        "3570": "female_high",
+    }
+    
+    counts = {}
+    for vt in set(speaker_voice_map.values()):
+        counts[vt] = 0
+        (DATASETS_DIR / vt).mkdir(parents=True, exist_ok=True)
+    
+    max_per_type = 100
+    
+    for sample in ds:
+        speaker = str(sample.get("speaker_id", ""))
+        
+        if speaker in speaker_voice_map:
+            voice_type = speaker_voice_map[speaker]
+            
+            if counts[voice_type] < max_per_type:
+                out_dir = DATASETS_DIR / voice_type
+                audio = sample["audio"]
+                out_path = out_dir / f"libritts_{speaker}_{counts[voice_type]}.wav"
+                
+                sf.write(str(out_path), audio["array"], audio["sampling_rate"])
+                counts[voice_type] += 1
+                print(f"  {voice_type}: {counts[voice_type]}/{max_per_type}", end="\r")
+        
+        if all(c >= max_per_type for c in counts.values()):
+            break
+    
+    print(f"\nLibriTTS complete: {counts}")
+
+def download_dialects():
+    """Download English dialect samples."""
+    print("\n=== Downloading English Dialects (Accents) ===")
+    
+    out_dir = DATASETS_DIR / "accent_non_native"
+    out_dir.mkdir(parents=True, exist_ok=True)
+    
+    count = 0
+    max_samples = 150
+    configs = ["scottish_male", "scottish_female", "irish_male", "welsh_male", "welsh_female", "northern_male"]
+    
+    for config in configs:
+        if count >= max_samples:
+            break
+        try:
+            ds = load_dataset("ylacombe/english_dialects", config, split="train", streaming=True)
+            for sample in ds:
+                if count >= max_samples:
+                    break
+                audio = sample["audio"]
+                out_path = out_dir / f"dialect_{config}_{count}.wav"
+                sf.write(str(out_path), audio["array"], audio["sampling_rate"])
+                count += 1
+                print(f"  accent_non_native: {count}/{max_samples}", end="\r")
+        except Exception as e:
+            print(f"  Error with {config}: {e}")
+    
+    print(f"\nDialects complete: {count} samples")
+
+def download_genshin():
+    """Download Genshin voices for anime style."""
+    print("\n=== Downloading Genshin Voices (Anime Style) ===")
+    
+    ds = load_dataset("simon3000/genshin-voice", split="train", streaming=True)
+    
+    out_dir = DATASETS_DIR / "anime_airy"
+    out_dir.mkdir(parents=True, exist_ok=True)
+    
+    count = 0
+    max_samples = 150
+    target_chars = ["paimon", "barbara", "kokomi", "nahida", "klee", "qiqi", "diona"]
+    
+    for sample in ds:
+        speaker = str(sample.get("speaker", "")).lower()
+        lang = sample.get("language", "")
+        
+        if lang != "en":
+            continue
+        
+        if any(char in speaker for char in target_chars) and count < max_samples:
+            audio = sample["audio"]
+            clean_speaker = speaker.replace(" ", "_").replace("/", "_")
+            out_path = out_dir / f"genshin_{clean_speaker}_{count}.wav"
+            
+            sf.write(str(out_path), audio["array"], audio["sampling_rate"])
+            count += 1
+            print(f"  anime_airy: {count}/{max_samples}", end="\r")
+        
+        if count >= max_samples:
+            break
+    
+    print(f"\nGenshin complete: {count} samples")
+
+def download_hifi():
+    """Download Hi-Fi TTS for singing voices."""
+    print("\n=== Downloading Hi-Fi TTS (Singing/High Quality) ===")
+    
+    ds = load_dataset("MikhailT/hifi-tts", "clean", split="train", streaming=True)
+    
+    # Map speakers to voice types
+    speaker_map = {
+        "92": "singing_male",
+        "6097": "singing_male",
+        "6670": "singing_female",
+        "6671": "singing_female",
+    }
+    
+    counts = {}
+    for vt in set(speaker_map.values()):
+        counts[vt] = 0
+        (DATASETS_DIR / vt).mkdir(parents=True, exist_ok=True)
+    
+    max_per_type = 100
+    
+    for sample in ds:
+        speaker = str(sample.get("speaker", ""))
+        
+        if speaker in speaker_map:
+            voice_type = speaker_map[speaker]
+            
+            if counts[voice_type] < max_per_type:
+                out_dir = DATASETS_DIR / voice_type
+                audio = sample["audio"]
+                out_path = out_dir / f"hifi_{speaker}_{counts[voice_type]}.wav"
+                
+                sf.write(str(out_path), audio["array"], audio["sampling_rate"])
+                counts[voice_type] += 1
+                print(f"  {voice_type}: {counts[voice_type]}/{max_per_type}", end="\r")
+        
+        if all(c >= max_per_type for c in counts.values()):
+            break
+    
+    print(f"\nHi-Fi TTS complete: {counts}")
+
+def print_summary():
+    """Print download summary."""
+    print("\n" + "=" * 50)
+    print("DOWNLOAD SUMMARY")
+    print("=" * 50)
+    
+    voice_types = [
+        "male_low", "male_mid", "female_low", "female_high",
+        "anime_airy", "accent_non_native", "singing_male", "singing_female",
+        "child", "elderly"
+    ]
+    
+    total = 0
+    for vt in voice_types:
+        vt_dir = DATASETS_DIR / vt
+        if vt_dir.exists():
+            files = list(vt_dir.glob("*.wav"))
+            count = len(files)
+            total += count
+            status = "✓" if count > 0 else "✗"
+            print(f"  {status} {vt}: {count} files")
+        else:
+            print(f"  ✗ {vt}: 0 files")
+    
+    print(f"\nTotal: {total} audio files downloaded")
+    print("\nNote: 'child' and 'elderly' need manual data - not available in these datasets.")
+
+def main():
+    print("=" * 50)
+    print("RVC Voice Dataset Downloader")
+    print("=" * 50)
+    print(f"Output: {DATASETS_DIR}")
+    
+    # Create all directories
+    for vt in ["male_low", "male_mid", "female_low", "female_high", 
+               "anime_airy", "accent_non_native", "singing_male", "singing_female",
+               "child", "elderly"]:
+        (DATASETS_DIR / vt).mkdir(parents=True, exist_ok=True)
+    
+    # Download each dataset
+    try:
+        download_libritts()
+    except Exception as e:
+        print(f"Error downloading LibriTTS: {e}")
+    
+    try:
+        download_dialects()
+    except Exception as e:
+        print(f"Error downloading Dialects: {e}")
+    
+    try:
+        download_genshin()
+    except Exception as e:
+        print(f"Error downloading Genshin: {e}")
+    
+    try:
+        download_hifi()
+    except Exception as e:
+        print(f"Error downloading Hi-Fi TTS: {e}")
+    
+    print_summary()
+    
+    print("\n" + "=" * 50)
+    print("NEXT STEPS")
+    print("=" * 50)
+    print("1. Review downloaded files in datasets/ folder")
+    print("2. Train models: python tools/train_batch.py --voice male_low")
+    print("3. Run experiments: python tools/run_experiments_batch.py")
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/run_experiments_batch.py b/tools/run_experiments_batch.py
new file mode 100644
index 000000000..b1cd65b26
--- /dev/null
+++ b/tools/run_experiments_batch.py
@@ -0,0 +1,93 @@
+import os
+import subprocess
+import sys
+
+# Add root to path
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+
+def main():
+    # List of voices as defined in Task 7/10
+    voices = [
+        'male_low', 'male_mid', 'female_low', 'female_high',
+        'anime_airy', 'accent_non_native', 'singing_male', 'singing_female',
+        'child', 'elderly'
+    ]
+
+    # Base paths
+    weights_dir = os.path.join(now_dir, "assets", "weights")
+    datasets_dir = os.path.join(now_dir, "datasets")
+    experiments_dir = os.path.join(now_dir, "experiments")
+    
+    # Path to test_grid.py
+    test_grid_script = os.path.join(now_dir, "tools", "test_grid.py")
+
+    print(f"Starting Batch Experiments for {len(voices)} voices...")
+
+    for voice in voices:
+        print(f"\n--- Processing Voice: {voice} ---")
+        
+        # Check for model
+        model_name = f"{voice}.pth"
+        model_path = os.path.join(weights_dir, model_name)
+        
+        if not os.path.exists(model_path):
+            print(f"Skipping {voice}: Model not found at {model_path}")
+            continue
+
+        # Check for test audio
+        # We need a reference audio to run inference on.
+        # Ideally, we should have a 'test_samples' folder or use a file from the dataset itself (held out).
+        # For now, let's look for a 'test.wav' in the voice's dataset folder, or a global test file.
+        
+        # Strategy: Look for 'test.wav' in dataset dir, else take the first wav file found.
+        voice_dataset_dir = os.path.join(datasets_dir, voice)
+        input_audio = None
+        
+        if os.path.exists(voice_dataset_dir):
+            potential_files = [f for f in os.listdir(voice_dataset_dir) if f.endswith(".wav")]
+            if "test.wav" in potential_files:
+                input_audio = os.path.join(voice_dataset_dir, "test.wav")
+            elif len(potential_files) > 0:
+                input_audio = os.path.join(voice_dataset_dir, potential_files[0])
+        
+        if not input_audio:
+            print(f"Skipping {voice}: No input audio found in {voice_dataset_dir}")
+            continue
+
+        # Check for index file (optional but recommended)
+        # Usually located in logs/{voice}/added_*.index
+        # We need to find it.
+        logs_dir = os.path.join(now_dir, "logs", voice)
+        index_path = ""
+        if os.path.exists(logs_dir):
+            for f in os.listdir(logs_dir):
+                if f.startswith("added_") and f.endswith(".index"):
+                    index_path = os.path.join(logs_dir, f)
+                    break
+        
+        print(f"Model: {model_name}")
+        print(f"Input: {input_audio}")
+        print(f"Index: {index_path if index_path else 'None'}")
+
+        # Run test_grid.py
+        cmd = [
+            sys.executable, test_grid_script,
+            "--model_name", model_name,
+            "--input_path", input_audio,
+            "--output_dir", experiments_dir
+        ]
+        
+        if index_path:
+            cmd.extend(["--index_path", index_path])
+
+        try:
+            subprocess.run(cmd, check=True)
+            print(f"Successfully ran experiments for {voice}")
+        except subprocess.CalledProcessError as e:
+            print(f"Error running experiments for {voice}: {e}")
+
+    print("\nBatch Experiments Completed.")
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/test_grid.py b/tools/test_grid.py
new file mode 100644
index 000000000..7f8728289
--- /dev/null
+++ b/tools/test_grid.py
@@ -0,0 +1,131 @@
+import itertools
+import argparse
+import os
+import sys
+import json
+import time
+from scipy.io import wavfile
+
+# Add root to path
+now_dir = os.getcwd()
+sys.path.append(now_dir)
+
+from dotenv import load_dotenv
+from configs.config import Config
+from infer.modules.vc.modules import VC
+
+def main():
+    parser = argparse.ArgumentParser(description="Run RVC inference across a parameter grid.")
+    parser.add_argument("--model_name", required=True, help="Name of the model (must be in assets/weights)")
+    parser.add_argument("--input_path", required=True, help="Path to reference audio file")
+    parser.add_argument("--index_path", default="", help="Path to .index file")
+    parser.add_argument("--output_dir", default="experiments", help="Base directory for output")
+    parser.add_argument("--f0up_key", type=int, default=0, help="Pitch shift (semitones)")
+    
+    args = parser.parse_args()
+
+    # Load config and VC
+    load_dotenv()
+    config = Config()
+    vc = VC(config)
+    vc.get_vc(args.model_name)
+
+    # Define Grid
+    # You can modify this grid in the code or make it configurable via JSON later
+    grid = {
+        "f0method": ["rmvpe", "pm"], # "harvest", "crepe" are slower
+        "index_rate": [0.0, 0.5, 0.75, 1.0],
+        "filter_radius": [3],
+        "rms_mix_rate": [0.25, 1.0],
+        "protect": [0.33],
+        "resample_sr": [0], # 0 means no resampling
+    }
+
+    # Prepare output directory
+    model_slug = os.path.splitext(args.model_name)[0]
+    audio_slug = os.path.splitext(os.path.basename(args.input_path))[0]
+    timestamp = int(time.time())
+    experiment_dir = os.path.join(args.output_dir, model_slug, audio_slug, str(timestamp))
+    os.makedirs(experiment_dir, exist_ok=True)
+
+    print(f"Starting Grid Search Experiment")
+    print(f"Model: {args.model_name}")
+    print(f"Input: {args.input_path}")
+    print(f"Output: {experiment_dir}")
+
+    # Generate combinations
+    keys = grid.keys()
+    values = grid.values()
+    combinations = list(itertools.product(*values))
+    
+    results = []
+
+    total = len(combinations)
+    print(f"Total combinations to run: {total}")
+
+    for i, combo in enumerate(combinations):
+        params = dict(zip(keys, combo))
+        print(f"[{i+1}/{total}] Running with {params}")
+
+        # Construct output filename
+        # e.g. rmvpe_idx0.5_rms1.0.wav
+        filename_parts = [f"{k}{v}" for k, v in params.items()]
+        filename = "_".join(filename_parts) + ".wav"
+        output_path = os.path.join(experiment_dir, filename)
+
+        # Run Inference
+        try:
+            info, opt = vc.vc_single(
+                0, # sid
+                args.input_path,
+                args.f0up_key,
+                None, # f0_file
+                params["f0method"],
+                args.index_path,
+                None, # file_index2
+                params["index_rate"],
+                params["filter_radius"],
+                params["resample_sr"],
+                params["rms_mix_rate"],
+                params["protect"]
+            )
+            
+            if "Success" in info:
+                tgt_sr, audio_opt = opt
+                wavfile.write(output_path, tgt_sr, audio_opt)
+                results.append({
+                    "params": params,
+                    "output_file": filename,
+                    "status": "success"
+                })
+            else:
+                print(f"Error: {info}")
+                results.append({
+                    "params": params,
+                    "status": "failed",
+                    "error": info
+                })
+
+        except Exception as e:
+            print(f"Exception: {e}")
+            results.append({
+                "params": params,
+                "status": "error",
+                "error": str(e)
+            })
+
+    # Save metadata
+    metadata_path = os.path.join(experiment_dir, "metadata.json")
+    with open(metadata_path, "w") as f:
+        json.dump({
+            "model": args.model_name,
+            "input_path": args.input_path,
+            "f0up_key": args.f0up_key,
+            "grid": grid,
+            "results": results
+        }, f, indent=2)
+
+    print(f"Experiment completed. Results saved to {experiment_dir}")
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/train_batch.py b/tools/train_batch.py
new file mode 100644
index 000000000..90604efb2
--- /dev/null
+++ b/tools/train_batch.py
@@ -0,0 +1,194 @@
+import os
+import sys
+import time
+import json
+import argparse
+import subprocess
+from pathlib import Path
+
+# Add project root to path
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from configs.config import Config
+
+def run_command(cmd, cwd=None):
+    print(f"Running: {cmd}")
+    process = subprocess.Popen(
+        cmd, 
+        shell=True, 
+        cwd=cwd,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        universal_newlines=True
+    )
+    
+    # Stream output
+    while True:
+        output = process.stdout.readline()
+        if output == '' and process.poll() is not None:
+            break
+        if output:
+            print(output.strip())
+            
+    rc = process.poll()
+    return rc
+
+def train_voice_model(voice_name, dataset_path, epochs=50, batch_size=8, sample_rate="40k", version="v2", gpu_id="0"):
+    """
+    Automates the RVC training pipeline for a single voice model.
+    """
+    print(f"\n{'='*50}")
+    print(f"Starting training for: {voice_name}")
+    print(f"{'='*50}\n")
+    
+    root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    logs_dir = os.path.join(root_dir, "logs", voice_name)
+    
+    # 1. Preprocessing
+    print("\n[Step 1/4] Preprocessing Data...")
+    cmd_preprocess = f"python infer/modules/train/preprocess.py \"{dataset_path}\" {sample_rate.replace('k','000')} 2 \"{logs_dir}\" False 3.0"
+    if run_command(cmd_preprocess, cwd=root_dir) != 0:
+        print("Error in preprocessing")
+        return False
+
+    # 2. Feature Extraction
+    print("\n[Step 2/4] Extracting Features...")
+    # F0 extraction (rmvpe_gpu)
+    cmd_f0 = f"python infer/modules/train/extract/extract_f0_rmvpe.py 1 0 0 \"{logs_dir}\" True"
+    if run_command(cmd_f0, cwd=root_dir) != 0:
+        print("Error in F0 extraction")
+        return False
+        
+    # Feature extraction (HuBERT)
+    cmd_feat = f"python infer/modules/train/extract_feature_print.py {gpu_id} 1 0 0 \"{logs_dir}\" {version} False"
+    if run_command(cmd_feat, cwd=root_dir) != 0:
+        print("Error in feature extraction")
+        return False
+
+    # 3. Training Model
+    print("\n[Step 3/4] Training Model...")
+    # Determine pretrained models
+    if version == "v1":
+        pg = f"assets/pretrained/f0G{sample_rate}.pth"
+        pd = f"assets/pretrained/f0D{sample_rate}.pth"
+    else:
+        pg = f"assets/pretrained_v2/f0G{sample_rate}.pth"
+        pd = f"assets/pretrained_v2/f0D{sample_rate}.pth"
+        
+    cmd_train = (
+        f"python infer/modules/train/train.py -e \"{voice_name}\" -sr {sample_rate} -f0 1 -bs {batch_size} "
+        f"-g {gpu_id} -te {epochs} -se 10 -pg \"{pg}\" -pd \"{pd}\" -l 0 -c 0 -sw 1 -v {version}"
+    )
+    
+    if run_command(cmd_train, cwd=root_dir) != 0:
+        print("Error in training")
+        return False
+
+    # 4. Training Index
+    print("\n[Step 4/4] Training Index...")
+    
+    index_script = f"""
+import sys
+import os
+import numpy as np
+import faiss
+from sklearn.cluster import MiniBatchKMeans
+
+exp_dir = "{logs_dir}"
+version = "{version}"
+feature_dir = os.path.join(exp_dir, "3_feature256" if version == "v1" else "3_feature768")
+
+if not os.path.exists(feature_dir):
+    print("Feature dir not found")
+    sys.exit(1)
+
+listdir_res = list(os.listdir(feature_dir))
+if len(listdir_res) == 0:
+    print("No features found")
+    sys.exit(1)
+
+npys = []
+for name in sorted(listdir_res):
+    phone = np.load(os.path.join(feature_dir, name))
+    npys.append(phone)
+
+big_npy = np.concatenate(npys, 0)
+big_npy_idx = np.arange(big_npy.shape[0])
+np.random.shuffle(big_npy_idx)
+big_npy = big_npy[big_npy_idx]
+
+if big_npy.shape[0] > 2e5:
+    big_npy = (
+        MiniBatchKMeans(
+            n_clusters=10000,
+            batch_size=256 * 8,
+            compute_labels=False,
+            init="random",
+        )
+        .fit(big_npy)
+        .cluster_centers_
+    )
+
+np.save(os.path.join(exp_dir, "total_fea.npy"), big_npy)
+n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39)
+index = faiss.index_factory(256 if version == "v1" else 768, "IVF%s,Flat" % n_ivf)
+index_ivf = faiss.extract_index_ivf(index)
+index_ivf.nprobe = 1
+index.train(big_npy)
+faiss.write_index(
+    index,
+    os.path.join(exp_dir, f"trained_IVF{{n_ivf}}_Flat_nprobe_1_{voice_name}_{version}.index")
+)
+
+batch_size_add = 8192
+for i in range(0, big_npy.shape[0], batch_size_add):
+    index.add(big_npy[i : i + batch_size_add])
+
+faiss.write_index(
+    index,
+    os.path.join(exp_dir, f"added_IVF{{n_ivf}}_Flat_nprobe_1_{voice_name}_{version}.index")
+)
+print("Index training complete")
+"""
+    
+    # Write temp script
+    with open("temp_index_train.py", "w") as f:
+        f.write(index_script)
+        
+    if run_command("python temp_index_train.py", cwd=root_dir) != 0:
+        print("Error in index training")
+        os.remove("temp_index_train.py")
+        return False
+        
+    os.remove("temp_index_train.py")
+    print(f"\nSuccessfully trained model for {voice_name}!")
+    return True
+
+def main():
+    parser = argparse.ArgumentParser(description="Batch Train RVC Models")
+    parser.add_argument("--voice", type=str, help="Specific voice name to train (folder name in datasets/)")
+    parser.add_argument("--epochs", type=int, default=50, help="Number of epochs")
+    parser.add_argument("--batch_size", type=int, default=8, help="Batch size")
+    
+    args = parser.parse_args()
+    
+    datasets_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "datasets")
+    
+    if args.voice:
+        voices = [args.voice]
+    else:
+        voices = [d for d in os.listdir(datasets_dir) if os.path.isdir(os.path.join(datasets_dir, d))]
+        
+    print(f"Found {len(voices)} voices to train: {voices}")
+    
+    for voice in voices:
+        dataset_path = os.path.join(datasets_dir, voice)
+        # Check if dataset has files
+        if not os.path.exists(dataset_path) or not os.listdir(dataset_path):
+            print(f"Skipping {voice} - no data found")
+            continue
+            
+        train_voice_model(voice, dataset_path, epochs=args.epochs, batch_size=args.batch_size)
+
+if __name__ == "__main__":
+    main()