Spaces:
Sleeping
Sleeping
#test correct replication of speaker phonemes | |
from transformers import pipeline | |
pipe = pipeline( | |
task="zero-shot-audio-classification", model="laion/clap-htsat-unfused" | |
) | |
import numpy as np | |
import gradio as gr | |
def get_labels(target): | |
return [f"An adult speaking.", f"A child speaking."] | |
def classify_audio(audio, target=None): | |
sr, y = audio | |
y = y.astype(np.float32) | |
y /= np.max(np.abs(y)) | |
# get labels | |
candidate_labels = get_labels(target) | |
preds = pipe(y, candidate_labels=candidate_labels) | |
outputs = {} | |
for p in preds: | |
outputs[p["label"]] = p["score"] | |
return outputs | |
demo = gr.Interface( | |
fn=classify_audio, inputs=[gr.Audio(source="microphone")], outputs=gr.outputs.Label() | |
) | |
demo.launch(debug=False) |