import gradio as gr import os from constants import VOICE_METHODS, BARK_VOICES, EDGE_VOICES import platform from models.model import * from tts.conversion import COQUI_LANGUAGES import pytube import os import traceback from pydub import AudioSegment # from audio_enhance.functions import audio_enhance def convert_yt_to_wav(url): if not url: return "Primero introduce el enlace del video", None try: print(f"Convirtiendo video {url}...") # Descargar el video utilizando pytube video = pytube.YouTube(url) stream = video.streams.filter(only_audio=True).first() video_output_folder = os.path.join(f"yt_videos") # Ruta de destino de la carpeta audio_output_folder = 'audios' print("Downloading video") video_file_path = stream.download(output_path=video_output_folder) print(video_file_path) file_name = os.path.basename(video_file_path) audio_file_path = os.path.join(audio_output_folder, file_name.replace('.mp4','.wav')) # convert mp4 to wav print("Converting to wav") sound = AudioSegment.from_file(video_file_path,format="mp4") sound.export(audio_file_path, format="wav") if os.path.exists(video_file_path): os.remove(video_file_path) return "Success", audio_file_path except ConnectionResetError as cre: return "Se ha perdido la conexi贸n, recarga o reintentalo nuevamente m谩s tarde.", None except Exception as e: return str(e), None with gr.Blocks() as app: gr.HTML("

Simple RVC Inference - by Juuxn 馃捇

") gr.HTML("

El espacio actual usa solo cpu, as铆 que es solo para inferencia. Se recomienda duplicar el espacio para no tener problemas con las colas de procesamiento.

") gr.Markdown("[![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/drive/1NKqqTR04HujeBxzwe7jbYEvNi8LbxD_N?usp=sharing)") gr.Markdown( "[![Duplicate this Space](https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm-dark.svg)](https://huggingface.co/spaces/juuxn/SimpleRVC?duplicate=true)\n\n" ) gr.Markdown("Recopilaci贸n de modelos que puedes usar: RVC + Kits ai. **[RVC Community Models](https://docs.google.com/spreadsheets/d/1owfUtQuLW9ReiIwg6U9UkkDmPOTkuNHf0OKQtWu1iaI)**") with gr.Tab("Inferencia"): model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="Url del modelo", show_label=True) with gr.Row(): with gr.Column(): audio_path = gr.Audio(label="Archivo de audio", show_label=True, type="filepath",) index_rate = gr.Slider(minimum=0, maximum=1, label="Search feature ratio:", value=0.75, interactive=True,) filter_radius1 = gr.Slider(minimum=0, maximum=7, label="Filtro (reducci贸n de asperezas respiraci贸n)", value=3, step=1, interactive=True,) with gr.Column(): f0_method = gr.Dropdown(choices=["harvest", "pm", "crepe", "crepe-tiny", "mangio-crepe", "mangio-crepe-tiny", "rmvpe"], value="rmvpe", label="Algoritmo", show_label=True) vc_transform0 = gr.Slider(minimum=-12, label="N煤mero de semitonos, subir una octava: 12, bajar una octava: -12", value=0, maximum=12, step=1) protect0 = gr.Slider( minimum=0, maximum=0.5, label="Protejer las consonantes sordas y los sonidos respiratorios. 0.5 para desactivarlo.", value=0.33, step=0.01, interactive=True, ) resample_sr1 = gr.Slider( minimum=0, maximum=48000, label="Re-muestreo sobre el audio de salida hasta la frecuencia de muestreo final. 0 para no re-muestrear.", value=0, step=1, interactive=True, ) # Salida with gr.Row(): vc_output1 = gr.Textbox(label="Salida") vc_output2 = gr.Audio(label="Audio de salida") btn = gr.Button(value="Convertir") btn.click(infer, inputs=[model_url, f0_method, audio_path, index_rate, vc_transform0, protect0, resample_sr1, filter_radius1], outputs=[vc_output1, vc_output2]) with gr.TabItem("TTS"): with gr.Row(): tts_text = gr.Textbox( label="Texto:", placeholder="Texto que deseas convertir a voz...", lines=6, ) with gr.Column(): with gr.Row(): tts_model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="Url del modelo RVC", show_label=True) with gr.Row(): tts_method = gr.Dropdown(choices=VOICE_METHODS, value="Edge-tts", label="M茅todo TTS:", visible=True) tts_model = gr.Dropdown(choices=EDGE_VOICES, label="Modelo TTS:", visible=True, interactive=True) tts_api_key = gr.Textbox(label="ElevenLabs Api key", show_label=True, placeholder="4a4afce72349680c8e8b6fdcfaf2b65a",interactive=True, visible=False) tts_coqui_languages = gr.Radio( label="Language", choices=COQUI_LANGUAGES, value="en", visible=False ) tts_btn = gr.Button(value="Convertir") with gr.Row(): tts_vc_output1 = gr.Textbox(label="Salida") tts_vc_output2 = gr.Audio(label="Audio de salida") tts_btn.click(fn=tts_infer, inputs=[tts_text, tts_model_url, tts_method, tts_model, tts_api_key, tts_coqui_languages], outputs=[tts_vc_output1, tts_vc_output2]) tts_msg = gr.Markdown("""**Recomiendo que te crees una cuenta de eleven labs y pongas tu clave de api, es gratis y tienes 10k caracteres de limite al mes.**
![Imgur](https://imgur.com/HH6YTu0.png) """, visible=False) tts_method.change(fn=update_tts_methods_voice, inputs=[tts_method], outputs=[tts_model, tts_msg, tts_api_key, tts_coqui_languages]) with gr.TabItem("Youtube"): gr.Markdown("## Convertir video de Youtube a audio") with gr.Row(): yt_url = gr.Textbox( label="Url del video:", placeholder="https://www.youtube.com/watch?v=3vEiqil5d3Q" ) yt_btn = gr.Button(value="Convertir") with gr.Row(): yt_output1 = gr.Textbox(label="Salida") yt_output2 = gr.Audio(label="Audio de salida") yt_btn.click(fn=convert_yt_to_wav, inputs=[yt_url], outputs=[yt_output1, yt_output2]) # with gr.TabItem("Mejora de audio"): # enhance_input_audio = gr.Audio(label="Audio de entrada") # enhance_output_audio = gr.Audio(label="Audio de salida") # btn_enhance_audio = gr.Button() # # btn_enhance_audio.click(fn=audio_enhance, inputs=[enhance_input_audio], outputs=[enhance_output_audio]) with gr.Tab("Modelos"): gr.HTML("

Buscar modelos

") search_name = gr.Textbox(placeholder="Billie Eillish (RVC v2 - 100 epoch)", label="Nombre", show_label=True) # Salida with gr.Row(): sarch_output = gr.Markdown(label="Salida") btn_search_model = gr.Button(value="Buscar") btn_search_model.click(fn=search_model, inputs=[search_name], outputs=[sarch_output]) gr.HTML("

Publica tu modelo

") post_name = gr.Textbox(placeholder="Billie Eillish (RVC v2 - 100 epoch)", label="Nombre", show_label=True) post_model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="Url del modelo", show_label=True) post_creator = gr.Textbox(placeholder="ID de discord o enlace al perfil del creador", label="Creador", show_label=True) post_version = gr.Dropdown(choices=["RVC v1", "RVC v2"], value="RVC v1", label="Versi贸n", show_label=True) # Salida with gr.Row(): post_output = gr.Markdown(label="Salida") btn_post_model = gr.Button(value="Publicar") btn_post_model.click(fn=post_model, inputs=[post_name, post_model_url, post_version, post_creator], outputs=[post_output]) # with gr.Column(): # model_voice_path07 = gr.Dropdown( # label=i18n("RVC Model:"), # choices=sorted(names), # value=default_weight, # ) # best_match_index_path1, _ = match_index( # model_voice_path07.value # ) # file_index2_07 = gr.Dropdown( # label=i18n("Select the .index file:"), # choices=get_indexes(), # value=best_match_index_path1, # interactive=True, # allow_custom_value=True, # ) # with gr.Row(): # refresh_button_ = gr.Button(i18n("Refresh"), variant="primary") # refresh_button_.click( # fn=change_choices2, # inputs=[], # outputs=[model_voice_path07, file_index2_07], # ) # with gr.Row(): # original_ttsvoice = gr.Audio(label=i18n("Audio TTS:")) # ttsvoice = gr.Audio(label=i18n("Audio RVC:")) # with gr.Row(): # button_test = gr.Button(i18n("Convert"), variant="primary") # button_test.click( # tts.use_tts, # inputs=[ # text_test, # tts_test, # model_voice_path07, # file_index2_07, # # transpose_test, # vc_transform0, # f0method8, # index_rate1, # crepe_hop_length, # f0_autotune, # ttsmethod_test, # ], # outputs=[ttsvoice, original_ttsvoice], # ) app.queue(concurrency_count=200, max_size=1022).launch() #share=True