Spaces:
Runtime error
Runtime error
Upload app.py
Browse files
app.py
CHANGED
@@ -27,18 +27,22 @@ translator_ppl = pipeline(
|
|
27 |
# model producing an image from text
|
28 |
image_ppl = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", use_auth_token=YOUR_TOKEN)
|
29 |
|
30 |
-
def transcribe(microphone, file_upload):
|
|
|
31 |
warn_output = ""
|
32 |
-
if (microphone is not None) and (file_upload is not None):
|
|
|
33 |
warn_output = (
|
34 |
"WARNING: You've uploaded an audio file and used the microphone. "
|
35 |
"The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
|
36 |
)
|
37 |
|
38 |
-
elif (microphone is None) and (file_upload is None):
|
|
|
39 |
return "ERROR: You have to either use the microphone or upload an audio file"
|
40 |
|
41 |
-
file = microphone if microphone is not None else file_upload
|
|
|
42 |
|
43 |
text = speech_ppl(file)["text"]
|
44 |
print("Text: ", text)
|
@@ -48,7 +52,7 @@ def transcribe(microphone, file_upload):
|
|
48 |
print("Translate 2: ", translate)
|
49 |
print("Building image .....")
|
50 |
#image = image_ppl(translate).images[0]
|
51 |
-
image = image_ppl(translate)["sample"]
|
52 |
print("Image: ", image)
|
53 |
image.save("text-to-image.png")
|
54 |
|
@@ -86,14 +90,14 @@ mf_transcribe = gr.Interface(
|
|
86 |
fn=transcribe,
|
87 |
inputs=[
|
88 |
gr.inputs.Audio(source="microphone", type="filepath", optional=True),
|
89 |
-
gr.inputs.Audio(source="upload", type="filepath", optional=True),
|
90 |
],
|
91 |
outputs=[gr.Textbox(label="Transcribed text"),
|
92 |
gr.Textbox(label="Summarized text"),
|
93 |
gr.Image(type="pil", label="Output image")],
|
94 |
layout="horizontal",
|
95 |
theme="huggingface",
|
96 |
-
title="Whisper Demo: Transcribe Audio",
|
97 |
description=(
|
98 |
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the the fine-tuned"
|
99 |
f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
|
@@ -101,7 +105,7 @@ mf_transcribe = gr.Interface(
|
|
101 |
),
|
102 |
allow_flagging="never",
|
103 |
)
|
104 |
-
|
105 |
yt_transcribe = gr.Interface(
|
106 |
fn=yt_transcribe,
|
107 |
inputs=[gr.inputs.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL")],
|
@@ -116,8 +120,10 @@ yt_transcribe = gr.Interface(
|
|
116 |
),
|
117 |
allow_flagging="never",
|
118 |
)
|
|
|
119 |
|
120 |
with demo:
|
121 |
-
gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Transcribe Audio", "Transcribe YouTube"])
|
|
|
122 |
|
123 |
demo.launch(enable_queue=True)
|
|
|
27 |
# model producing an image from text
|
28 |
image_ppl = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", use_auth_token=YOUR_TOKEN)
|
29 |
|
30 |
+
#def transcribe(microphone, file_upload):
|
31 |
+
def transcribe(microphone):
|
32 |
warn_output = ""
|
33 |
+
# if (microphone is not None) and (file_upload is not None):
|
34 |
+
if (microphone is not None):
|
35 |
warn_output = (
|
36 |
"WARNING: You've uploaded an audio file and used the microphone. "
|
37 |
"The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
|
38 |
)
|
39 |
|
40 |
+
# elif (microphone is None) and (file_upload is None):
|
41 |
+
elif (microphone is None):
|
42 |
return "ERROR: You have to either use the microphone or upload an audio file"
|
43 |
|
44 |
+
# file = microphone if microphone is not None else file_upload
|
45 |
+
file = microphone
|
46 |
|
47 |
text = speech_ppl(file)["text"]
|
48 |
print("Text: ", text)
|
|
|
52 |
print("Translate 2: ", translate)
|
53 |
print("Building image .....")
|
54 |
#image = image_ppl(translate).images[0]
|
55 |
+
image = image_ppl(translate, num_inference_steps=15)["sample"]
|
56 |
print("Image: ", image)
|
57 |
image.save("text-to-image.png")
|
58 |
|
|
|
90 |
fn=transcribe,
|
91 |
inputs=[
|
92 |
gr.inputs.Audio(source="microphone", type="filepath", optional=True),
|
93 |
+
#gr.inputs.Audio(source="upload", type="filepath", optional=True),
|
94 |
],
|
95 |
outputs=[gr.Textbox(label="Transcribed text"),
|
96 |
gr.Textbox(label="Summarized text"),
|
97 |
gr.Image(type="pil", label="Output image")],
|
98 |
layout="horizontal",
|
99 |
theme="huggingface",
|
100 |
+
title="Whisper Demo: Transcribe Audio to Image",
|
101 |
description=(
|
102 |
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the the fine-tuned"
|
103 |
f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
|
|
|
105 |
),
|
106 |
allow_flagging="never",
|
107 |
)
|
108 |
+
'''
|
109 |
yt_transcribe = gr.Interface(
|
110 |
fn=yt_transcribe,
|
111 |
inputs=[gr.inputs.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL")],
|
|
|
120 |
),
|
121 |
allow_flagging="never",
|
122 |
)
|
123 |
+
'''
|
124 |
|
125 |
with demo:
|
126 |
+
#gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Transcribe Audio", "Transcribe YouTube"])
|
127 |
+
gr.TabbedInterface(mf_transcribe, "Transcribe Audio to Image")
|
128 |
|
129 |
demo.launch(enable_queue=True)
|