Spaces:

AffordableAI
/

Construction_Site_Safety_Analyzer_Llama_3.2_Vision

Running

App Files Files Community

capradeepgujaran commited on Oct 18

Commit

10b5661

•

1 Parent(s): 2768978

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -33

app.py CHANGED Viewed

@@ -1,50 +1,95 @@
 import gradio as gr
 from PIL import Image
-import torch
-from transformers import AutoProcessor, LlamaForCausalLM, LlamaTokenizer
-# Load the Llama 2 model and processor
-# Note: You'll need to replace these with the actual Llama 3.2 vision model when it becomes available
-model_name = "meta-llama/Llama-2-7b-chat-hf"
-processor = AutoProcessor.from_pretrained(model_name)
-model = LlamaForCausalLM.from_pretrained(model_name)
-tokenizer = LlamaTokenizer.from_pretrained(model_name)
 def analyze_construction_image(image):
-    # Process the image
-    inputs = processor(images=image, return_tensors="pt")
-    # Generate text based on the image
-    prompt = "Analyze this construction image and identify the snag category, snag description, and steps to desnag."
-    input_ids = tokenizer(prompt, return_tensors="pt").input_ids
-    # Concatenate the image embeddings with the text input
-    combined_inputs = torch.cat([inputs.pixel_values, input_ids], dim=1)
-    # Generate output
-    outputs = model.generate(combined_inputs, max_length=300)
-    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Parse the result (this is a simplified example)
-    lines = result.split('\n')
-    snag_category = lines[0] if len(lines) > 0 else "N/A"
-    snag_description = lines[1] if len(lines) > 1 else "N/A"
-    desnag_steps = lines[2:] if len(lines) > 2 else ["N/A"]
-    return snag_category, snag_description, "\n".join(desnag_steps)
 # Create the Gradio interface
 iface = gr.Interface(
     fn=analyze_construction_image,
-    inputs=gr.Image(type="pil"),
     outputs=[
         gr.Textbox(label="Snag Category"),
         gr.Textbox(label="Snag Description"),
         gr.Textbox(label="Steps to Desnag")
     ],
-    title="Construction Image Analyzer",
-    description="Upload a construction site image to identify issues and get desnag steps."
 )
 # Launch the app
-iface.launch()

+import os
+import base64
+import requests
 import gradio as gr
 from PIL import Image
+import io
+# Load environment variables
+GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
+GROQ_API_URL = "https://api.groq.com/v1/chat/completions"
+def encode_image(image_path):
+    with open(image_path, "rb") as image_file:
+        return base64.b64encode(image_file.read()).decode('utf-8')
 def analyze_construction_image(image):
+    if image is None:
+        return "Error: No image uploaded", "", ""
+    try:
+        # Convert PIL Image to base64
+        buffered = io.BytesIO()
+        image.save(buffered, format="PNG")
+        img_str = base64.b64encode(buffered.getvalue()).decode()
+        # Prepare the message for Groq API
+        messages = [
+            {
+                "role": "system",
+                "content": "You are an AI assistant specialized in analyzing construction site images. Identify issues, categorize them, and provide steps to resolve them."
+            },
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "Analyze this construction image. Identify the snag category, provide a detailed snag description, and list steps to desnag."
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": f"data:image/png;base64,{img_str}"
+                    }
+                ]
+            }
+        ]
+        # Make API request to Groq
+        headers = {
+            "Authorization": f"Bearer {GROQ_API_KEY}",
+            "Content-Type": "application/json"
+        }
+        data = {
+            "model": "llama3-2-vision-90b",  # Adjust model name if necessary
+            "messages": messages,
+            "max_tokens": 300,
+            "temperature": 0.7
+        }
+        response = requests.post(GROQ_API_URL, headers=headers, json=data)
+        response.raise_for_status()
+        result = response.json()["choices"][0]["message"]["content"]
+        # Parse the result
+        lines = result.split('\n')
+        snag_category = lines[0] if len(lines) > 0 else "N/A"
+        snag_description = lines[1] if len(lines) > 1 else "N/A"
+        desnag_steps = "\n".join(lines[2:]) if len(lines) > 2 else "N/A"
+        return snag_category, snag_description, desnag_steps
+    except Exception as e:
+        return f"Error: {str(e)}", "", ""
 # Create the Gradio interface
 iface = gr.Interface(
     fn=analyze_construction_image,
+    inputs=gr.Image(type="pil", label="Upload Construction Image"),
     outputs=[
         gr.Textbox(label="Snag Category"),
         gr.Textbox(label="Snag Description"),
         gr.Textbox(label="Steps to Desnag")
     ],
+    title="Construction Image Analyzer (Llama 3.2-Vision via Groq)",
+    description="Upload a construction site image to identify issues and get desnag steps using Llama 3.2-Vision technology through Groq API.",
+    examples=[
+        ["example_image1.jpg"],
+        ["example_image2.jpg"]
+    ],
+    cache_examples=True,
+    theme="default"
 )
 # Launch the app
+if __name__ == "__main__":
+    iface.launch()