Spaces:
Runtime error
Runtime error
File size: 3,843 Bytes
12e4312 eb7e85b 12e4312 eb7e85b 67e06ea eb7e85b 67e06ea eb7e85b 67e06ea eb7e85b 67e06ea eb7e85b 67e06ea eb7e85b 12e4312 0752193 eb7e85b 0752193 eb7e85b 12e4312 eb7e85b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import json
import os
import subprocess
from pathlib import Path
import gradio as gr
import librosa
import numpy as np
import torch
from demucs.apply import apply_model
from demucs.pretrained import DEFAULT_MODEL, get_model
from huggingface_hub import hf_hub_download, list_repo_files
from so_vits_svc_fork.hparams import HParams
from so_vits_svc_fork.inference.core import Svc
###################################################################
# REPLACE THESE VALUES TO CHANGE THE MODEL REPO/CKPT NAME/SETTINGS
###################################################################
# The Hugging Face Hub repo IDs - 在这里修改repo_id,可替换成任何已经训练好的模型!
repo_ids = ["nijisakai/sunyanzi", "kevinwang676/jay"]
# If None, Uses latest ckpt in the repo
ckpt_name = None
# If None, Uses "kmeans.pt" if it exists in the repo
cluster_model_name = None
# Set the default f0 type to use - use the one it was trained on.
# The default for so-vits-svc-fork is "dio".
# Options: "crepe", "crepe-tiny", "parselmouth", "dio", "harvest"
default_f0_method = "crepe"
# The default ratio of cluster inference to SVC inference.
# If cluster_model_name is not found in the repo, this is set to 0.
default_cluster_infer_ratio = 0.5
# Limit on duration of audio at inference time. increase if you can
# In this parent app, we set the limit with an env var to 30 seconds
# If you didnt set env var + you go OOM try changing 9e9 to <=300ish
duration_limit = int(os.environ.get("MAX_DURATION_SECONDS", 9e9))
###################################################################
interfaces = []
for repo_id in repo_ids:
# Figure out the latest generator by taking highest value one.
# Ex. if the repo has: G_0.pth, G_100.pth, G_200.pth, we'd use G_200.pth
if ckpt_name is None:
latest_id = sorted(
[
int(Path(x).stem.split("_")[1])
for x in list_repo_files(repo_id)
if x.startswith("G_") and x.endswith(".pth")
]
)[-1]
ckpt_name = f"G_{latest_id}.pth"
cluster_model_name = cluster_model_name or "kmeans.pt"
if cluster_model_name in list_repo_files(repo_id):
print(f"Found Cluster model - Downloading {cluster_model_name} from {repo_id}")
cluster_model_path = hf_hub_download(repo_id, cluster_model_name)
else:
print(f"Could not find {cluster_model_name} in {repo_id}. Using None")
cluster_model_path = None
default_cluster_infer_ratio = default_cluster_infer_ratio if cluster_model_path else 0
generator_path = hf_hub_download(repo_id, ckpt_name)
config_path = hf_hub_download(repo_id, "config.json")
hparams = HParams(**json.loads(Path(config_path).read_text()))
speakers = list(hparams.spk.keys())
device = "cuda" if torch.cuda.is_available() else "cpu"
model = Svc(net_g_path=generator_path, config_path=config_path, device=device, cluster_model_path=cluster_model_path)
demucs_model = get_model(DEFAULT_MODEL)
# ... (same code as before to define the functions)
interface = gr.Interface(
predict,
inputs=[
gr.Dropdown(speakers, label="🎤AI歌手选择🎶"),
gr.Audio(type="filepath", source="microphone", label="请用麦克风上传您想转换的歌曲"),
# ... (same inputs as before)
],
outputs="audio",
cache_examples=False,
title=f"🌊💕🎶 - 滔滔AI+音乐:可从B站直接上传素材,无需分离背景音 ({repo_id})",
description=description,
article=article,
)
interfaces.append(interface)
# Combine the interfaces using a TabbedInterface
interface = gr.TabbedInterface(interfaces, [f"Model {i+1}" for i in range(len(interfaces))])
if __name__ == "__main__":
interface.launch(show_error=True)
|