File size: 1,689 Bytes
982d2bc
 
 
bafac00
982d2bc
 
bafac00
982d2bc
3bb6fbc
d3762a8
bafac00
982d2bc
d0cc855
 
1fb4ca2
982d2bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import streamlit as st
from PIL import Image
import base64
import os
from langchain.llms import OpenAI
from langchain.prompts import ChatPromptTemplate

# Set up environment variable for OpenAI API key

os.environ["OPENAI_API_KEY"] = os.getenv("k3")

# Initialize OpenAI with LangChain
llm = OpenAI(api_key=os.environ["OPENAI_API_KEY"], model="cyberagent/llava-calm2-siglip", temperature=0)
#llm = OpenAI(api_key=os.environ["OPENAI_API_KEY"], model="gpt-4-vision-preview", temperature=0)

# Prompt template for image captioning
caption_template = """
You are an expert image captioner. Given an image in base64 format, provide a descriptive caption for the image.

Image (base64): {image_base64}

Caption:
"""

prompt = ChatPromptTemplate.from_messages([("system", caption_template)])

def generate_caption(image):
    # Convert the image to base64
    buffered = BytesIO()
    image.save(buffered, format="JPEG")
    img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
    
    # Create the prompt
    formatted_prompt = prompt.format(image_base64=img_base64)
    
    # Generate the caption using OpenAI
    response = llm(formatted_prompt)
    
    return response.choices[0].text.strip()

st.title("Image Captioning Application :robot_face:")

uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])

if uploaded_file is not None:
    image = Image.open(uploaded_file)
    st.image(image, caption="Uploaded Image.", use_column_width=True)
    
    if st.button("Generate Caption"):
        with st.spinner("Generating caption..."):
            caption = generate_caption(image)
            st.write("Caption: ")
            st.write(caption)