import streamlit as st from PIL import Image import base64 import os from langchain.llms import OpenAI from langchain.prompts import ChatPromptTemplate # Set up environment variable for OpenAI API key os.environ["OPENAI_API_KEY"] = os.getenv("k3") # Initialize OpenAI with LangChain llm = OpenAI(api_key=os.environ["OPENAI_API_KEY"], model="cyberagent/llava-calm2-siglip", temperature=0) #llm = OpenAI(api_key=os.environ["OPENAI_API_KEY"], model="gpt-4-vision-preview", temperature=0) # Prompt template for image captioning caption_template = """ You are an expert image captioner. Given an image in base64 format, provide a descriptive caption for the image. Image (base64): {image_base64} Caption: """ prompt = ChatPromptTemplate.from_messages([("system", caption_template)]) def generate_caption(image): # Convert the image to base64 buffered = BytesIO() image.save(buffered, format="JPEG") img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8') # Create the prompt formatted_prompt = prompt.format(image_base64=img_base64) # Generate the caption using OpenAI response = llm(formatted_prompt) return response.choices[0].text.strip() st.title("Image Captioning Application :robot_face:") uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) if uploaded_file is not None: image = Image.open(uploaded_file) st.image(image, caption="Uploaded Image.", use_column_width=True) if st.button("Generate Caption"): with st.spinner("Generating caption..."): caption = generate_caption(image) st.write("Caption: ") st.write(caption)