santoshNA's picture
Update app.py
5b0eee7
raw
history blame contribute delete
No virus
2.51 kB
from transformers import pipeline
from langchain import PromptTemplate, LLMChain, OpenAI
import requests
import os
import streamlit as st
HF_API_KEY=st.secrets["HF_API_KEY"]
OpenAI_API_Key=st.secrets["OPENAI_API_KEY"]
openai_instance = OpenAI(api_key=OpenAI_API_Key)
# img2text
def img2text(url):
image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
text = image_to_text_model(url)[0]["generated_text"]
print(text)
return text
# Describe it using LLM
def generate_description(caption):
template = """
You are a narrator;
Write a suitable image description of an image captioned as mentioned in Context. Upto 5 bullet points including few historic facts about the image and how the image can be described to a visually impaired user;
CONTEXT: {caption};
"""
prompt = PromptTemplate(template=template, input_variables=["caption"])
desc_llm = LLMChain(llm=openai_instance, prompt=prompt, verbose=True)
description = desc_llm.predict(caption=caption).replace('"', '')
print(description)
return description
# text to speech
def text2speech(message):
API_URL = "https://api-inference.huggingface.co/models/espnet/kan-bayashi_ljspeech_vits"
headers = {"Authorization": f"Bearer {HF_API_KEY}"}
payload = {
"inputs": message
}
response = requests.post(API_URL, headers=headers, json=payload)
with open('audio.flac', 'wb') as file:
file.write(response.content)
def main():
st.set_page_config(page_title="image-to-caption-to-summary", page_icon="😊")
st.header("Image to caption to summary")
uploaded_file = st.file_uploader("Choose an image", type=['png', 'jpg'])
if uploaded_file is not None:
print(uploaded_file)
bytes_data = uploaded_file.getvalue()
with open(uploaded_file.name, "wb") as file:
file.write(bytes_data)
st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
st.text('Processing img2text...')
caption = img2text(uploaded_file.name)
with st.expander("caption"):
st.write(caption)
st.text('Generating description of given image...')
description = generate_description(caption)
with st.expander("Description"):
st.write(description)
st.text('Processing text2speech...')
text2speech(description)
st.audio("audio.flac")
if __name__ == '__main__':
main()