Spaces:

colerobertson
/

child_speech

Sleeping

child_speech / app.py

Upload folder using huggingface_hub

5b1f241 verified 9 months ago

766 Bytes

	#test correct replication of speaker phonemes
	from transformers import pipeline
	pipe = pipeline(
	task="zero-shot-audio-classification", model="laion/clap-htsat-unfused"
	)

	import numpy as np
	import gradio as gr


	def get_labels(target):
	return [f"An adult speaking.", f"A child speaking."]

	def classify_audio(audio, target=None):
	sr, y = audio
	y = y.astype(np.float32)
	y /= np.max(np.abs(y))

	# get labels
	candidate_labels = get_labels(target)
	preds = pipe(y, candidate_labels=candidate_labels)
	outputs = {}
	for p in preds:
	outputs[p["label"]] = p["score"]
	return outputs


	demo = gr.Interface(
	fn=classify_audio, inputs=[gr.Audio(source="microphone")], outputs=gr.outputs.Label()
	)
	demo.launch(debug=False)