Spaces:

santoshNA
/

Image_caption_description

Sleeping

App Files Files Community

Image_caption_description / app.py

santoshNA

Update app.py

5b0eee7 10 months ago

raw

history blame contribute delete

No virus

2.51 kB

	from transformers import pipeline
	from langchain import PromptTemplate, LLMChain, OpenAI
	import requests
	import os
	import streamlit as st


	HF_API_KEY=st.secrets["HF_API_KEY"]
	OpenAI_API_Key=st.secrets["OPENAI_API_KEY"]
	openai_instance = OpenAI(api_key=OpenAI_API_Key)

	# img2text
	def img2text(url):
	image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
	text = image_to_text_model(url)[0]["generated_text"]

	print(text)
	return text


	# Describe it using LLM
	def generate_description(caption):
	template = """
	You are a narrator;
	Write a suitable image description of an image captioned as mentioned in Context. Upto 5 bullet points including few historic facts about the image and how the image can be described to a visually impaired user;
	CONTEXT: {caption};
	"""

	prompt = PromptTemplate(template=template, input_variables=["caption"])

	desc_llm = LLMChain(llm=openai_instance, prompt=prompt, verbose=True)
	description = desc_llm.predict(caption=caption).replace('"', '')

	print(description)
	return description



	# text to speech
	def text2speech(message):
	API_URL = "https://api-inference.huggingface.co/models/espnet/kan-bayashi_ljspeech_vits"
	headers = {"Authorization": f"Bearer {HF_API_KEY}"}
	payload = {
	"inputs": message
	}

	response = requests.post(API_URL, headers=headers, json=payload)
	with open('audio.flac', 'wb') as file:
	file.write(response.content)


	def main():
	st.set_page_config(page_title="image-to-caption-to-summary", page_icon="😊")
	st.header("Image to caption to summary")
	uploaded_file = st.file_uploader("Choose an image", type=['png', 'jpg'])

	if uploaded_file is not None:
	print(uploaded_file)
	bytes_data = uploaded_file.getvalue()
	with open(uploaded_file.name, "wb") as file:
	file.write(bytes_data)

	st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)

	st.text('Processing img2text...')
	caption = img2text(uploaded_file.name)
	with st.expander("caption"):
	st.write(caption)

	st.text('Generating description of given image...')
	description = generate_description(caption)
	with st.expander("Description"):
	st.write(description)

	st.text('Processing text2speech...')
	text2speech(description)
	st.audio("audio.flac")

	if __name__ == '__main__':
	main()