Spaces:

PolyAI
/

pheme

Runtime error

App Files Files Community

pheme / app.py

pfb30

Update app.py

e946294 verified 10 months ago

raw

history blame contribute delete

1.98 kB

	"""Simple demo app.

	Copyright PolyAI Limited.
	"""
	import time
	from pathlib import Path

	import gradio as gr

	from transformer_infer import PhemeClient, parse_arguments


	VOICE_OPTIONS = [
	"male_voice",
	"POD1000000004_S0000246",
	"POD1000000018_S0000253",
	"POD1000000048_S0000035",
	"YOU1000000006_S0000051",
	"YOU1000000044_S0000798",
	]

	args = parse_arguments()

	model = PhemeClient(args)


	def inference(
	text,
	voice,
	top_k,
	temperature
	):
	with open("PhemeVoice.log", "a") as f:
	f.write(f"{voice}: {text} \n")
	start_time = time.time()

	data = model.infer(
	text, voice, top_k=top_k, temperature=temperature)
	samplerate = 16_000
	print("Time taken: ", time.time() - start_time)
	yield (samplerate, data)


	def main():
	title = "Pheme"
	description = """Pheme Model can generate a variety of conversational voices in 16 kHz for phone-call applications.

	Paper: https://arxiv.org/pdf/2401.02839.pdf
	Github: https://github.com/PolyAI-LDN/pheme

	Voices are generated in a zero-shot manner, the model has never seen them before.
	"""
	text = gr.Textbox(
	lines=3,
	value="I gotta say, I never expect that to happened. Um I had some expectations but you know.",
	label="Text",
	)

	voice = gr.Dropdown(
	VOICE_OPTIONS, value="POD1000000048_S0000035", label="Select voice:", type="value"
	)
	temperature = gr.Slider(minimum=.3, maximum=1.5, value=0.7, step=0.05)
	top_k = gr.Slider(minimum=10, maximum=250, value=210)
	output_audio = gr.Audio(label="audio:", autoplay=True)
	interface = gr.Interface(
	fn=inference,
	inputs=[
	text,
	voice,
	top_k,
	temperature,
	],
	title=title,
	description=description,
	outputs=[output_audio],
	)
	interface.queue().launch(share=True)


	if __name__ == "__main__":
	main()