Llama-Vision-Together

Runtime error

App Files Files Community

akhaliq HF staff commited on Sep 25

Commit

cb0fdde

•

1 Parent(s): 7bc7ddc

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -46

app.py CHANGED Viewed

@@ -8,65 +8,78 @@ import base64
 # Initialize the Together AI client
 client = Together(api_key=os.environ.get('TOGETHER_API_KEY'))
-def encode_image(image):
-    buffered = io.BytesIO()
-    image.save(buffered, format="PNG")
-    return base64.b64encode(buffered.getvalue()).decode('utf-8')
 def chat_with_image(message, image, history):
     # Prepare the messages
     messages = [{"role": "system", "content": "You are a helpful assistant that can analyze images and text."}]
     for human, assistant in history:
-        if image is not None and human.startswith("Image uploaded: "):
             # This is an image message
-            encoded_image = encode_image(Image.open(image))
             messages.append({
                 "role": "user",
                 "content": [
                     {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{encoded_image}"}},
-                    {"type": "text", "text": message}
                 ]
             })
         else:
-            # This is a text-only message
-            messages.append({"role": "user", "content": human})
-        messages.append({"role": "assistant", "content": assistant})
-    # Add the current message
-    if image is not None:
-        encoded_image = encode_image(Image.open(image))
-        messages.append({
-            "role": "user",
-            "content": [
-                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{encoded_image}"}},
-                {"type": "text", "text": message}
-            ]
-        })
     else:
         messages.append({"role": "user", "content": message})
     # Call the Together AI API
-    response = client.chat.completions.create(
-        model="meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
-        messages=messages,
-        max_tokens=512,
-        temperature=0.7,
-        top_p=0.7,
-        top_k=50,
-        repetition_penalty=1,
-        stop=["<|eot_id|>", "<|eom_id|>"],
-        stream=True
-    )
-    # Accumulate the response
-    full_response = ""
-    for chunk in response:
-        if chunk.choices[0].delta.content is not None:
-            full_response += chunk.choices[0].delta.content
-            yield full_response
-    return full_response
 # Create the Gradio interface
 with gr.Blocks() as demo:
@@ -76,17 +89,17 @@ with gr.Blocks() as demo:
     clear = gr.Button("Clear")
     def user(user_message, image, history):
-        if image is not None:
-            return "", None, history + [["Image uploaded: " + user_message, None]]
         else:
             return "", None, history + [[user_message, None]]
     def bot(history):
         user_message = history[-1][0]
         image = None
-        if user_message.startswith("Image uploaded: "):
-            image = history[-2][0].split(": ", 1)[1]  # Get the image path from the previous message
-            user_message = user_message.split(": ", 1)[1]  # Get the actual message
         bot_message = chat_with_image(user_message, image, history[:-1])
         history[-1][1] = ""

 # Initialize the Together AI client
 client = Together(api_key=os.environ.get('TOGETHER_API_KEY'))
+def encode_image(image_path):
+    try:
+        with Image.open(image_path) as img:
+            buffered = io.BytesIO()
+            img.save(buffered, format="PNG")
+            return base64.b64encode(buffered.getvalue()).decode('utf-8')
+    except Exception as e:
+        print(f"Error encoding image: {e}")
+        return None
 def chat_with_image(message, image, history):
     # Prepare the messages
     messages = [{"role": "system", "content": "You are a helpful assistant that can analyze images and text."}]
     for human, assistant in history:
+        if human.startswith("Image: "):
             # This is an image message
+            image_path = human.split(": ", 1)[1]
+            encoded_image = encode_image(image_path)
+            if encoded_image:
+                messages.append({
+                    "role": "user",
+                    "content": [
+                        {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{encoded_image}"}},
+                        {"type": "text", "text": "What's in this image?"}
+                    ]
+                })
+            else:
+                messages.append({"role": "user", "content": "I tried to upload an image, but there was an error."})
+        else:
+            # This is a text-only message
+            messages.append({"role": "user", "content": human})
+        messages.append({"role": "assistant", "content": assistant})
+    # Add the current message
+    if image:
+        encoded_image = encode_image(image)
+        if encoded_image:
             messages.append({
                 "role": "user",
                 "content": [
                     {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{encoded_image}"}},
+                    {"type": "text", "text": message or "What's in this image?"}
                 ]
             })
         else:
+            messages.append({"role": "user", "content": "I tried to upload an image, but there was an error."})
     else:
         messages.append({"role": "user", "content": message})
     # Call the Together AI API
+    try:
+        response = client.chat.completions.create(
+            model="meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
+            messages=messages,
+            max_tokens=512,
+            temperature=0.7,
+            top_p=0.7,
+            top_k=50,
+            repetition_penalty=1,
+            stop=["<|eot_id|>", "<|eom_id|>"],
+            stream=True
+        )
+        # Accumulate the response
+        full_response = ""
+        for chunk in response:
+            if chunk.choices[0].delta.content is not None:
+                full_response += chunk.choices[0].delta.content
+                yield full_response
+    except Exception as e:
+        yield f"An error occurred: {str(e)}"
 # Create the Gradio interface
 with gr.Blocks() as demo:
     clear = gr.Button("Clear")
     def user(user_message, image, history):
+        if image:
+            return "", None, history + [[f"Image: {image}", None]]
         else:
             return "", None, history + [[user_message, None]]
     def bot(history):
         user_message = history[-1][0]
         image = None
+        if user_message.startswith("Image: "):
+            image = user_message.split(": ", 1)[1]
+            user_message = "What's in this image?"
         bot_message = chat_with_image(user_message, image, history[:-1])
         history[-1][1] = ""