import streamlit as st
from PIL import Image
import base64
import os
from langchain.llms import OpenAI
from langchain.prompts import ChatPromptTemplate

# Set up environment variable for OpenAI API key

os.environ["OPENAI_API_KEY"] = os.getenv("k3")

# Initialize OpenAI with LangChain
llm = OpenAI(api_key=os.environ["OPENAI_API_KEY"], model="cyberagent/llava-calm2-siglip", temperature=0)

# Prompt template for image captioning
caption_template = """
You are an expert image captioner. Given an image in base64 format, provide a descriptive caption for the image.

Image (base64): {image_base64}

Caption:
"""

prompt = ChatPromptTemplate.from_messages([("system", caption_template)])

def generate_caption(image):
    # Convert the image to base64
    buffered = BytesIO()
    image.save(buffered, format="JPEG")
    img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
    
    # Create the prompt
    formatted_prompt = prompt.format(image_base64=img_base64)
    
    # Generate the caption using OpenAI
    response = llm(formatted_prompt)
    
    return response.choices[0].text.strip()

# Streamlit UI
st.title("Image Captioning Application :robot_face:")

uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])

if uploaded_file is not None:
    image = Image.open(uploaded_file)
    st.image(image, caption="Uploaded Image.", use_column_width=True)
    
    if st.button("Generate Caption"):
        with st.spinner("Generating caption..."):
            caption = generate_caption(image)
            st.write("Caption: ")
            st.write(caption)