capradeepgujaran commited on
Commit
10b5661
1 Parent(s): 2768978

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -33
app.py CHANGED
@@ -1,50 +1,95 @@
 
 
 
1
  import gradio as gr
2
  from PIL import Image
3
- import torch
4
- from transformers import AutoProcessor, LlamaForCausalLM, LlamaTokenizer
5
 
6
- # Load the Llama 2 model and processor
7
- # Note: You'll need to replace these with the actual Llama 3.2 vision model when it becomes available
8
- model_name = "meta-llama/Llama-2-7b-chat-hf"
9
- processor = AutoProcessor.from_pretrained(model_name)
10
- model = LlamaForCausalLM.from_pretrained(model_name)
11
- tokenizer = LlamaTokenizer.from_pretrained(model_name)
 
12
 
13
  def analyze_construction_image(image):
14
- # Process the image
15
- inputs = processor(images=image, return_tensors="pt")
16
-
17
- # Generate text based on the image
18
- prompt = "Analyze this construction image and identify the snag category, snag description, and steps to desnag."
19
- input_ids = tokenizer(prompt, return_tensors="pt").input_ids
20
-
21
- # Concatenate the image embeddings with the text input
22
- combined_inputs = torch.cat([inputs.pixel_values, input_ids], dim=1)
23
-
24
- # Generate output
25
- outputs = model.generate(combined_inputs, max_length=300)
26
- result = tokenizer.decode(outputs[0], skip_special_tokens=True)
27
-
28
- # Parse the result (this is a simplified example)
29
- lines = result.split('\n')
30
- snag_category = lines[0] if len(lines) > 0 else "N/A"
31
- snag_description = lines[1] if len(lines) > 1 else "N/A"
32
- desnag_steps = lines[2:] if len(lines) > 2 else ["N/A"]
33
-
34
- return snag_category, snag_description, "\n".join(desnag_steps)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  # Create the Gradio interface
37
  iface = gr.Interface(
38
  fn=analyze_construction_image,
39
- inputs=gr.Image(type="pil"),
40
  outputs=[
41
  gr.Textbox(label="Snag Category"),
42
  gr.Textbox(label="Snag Description"),
43
  gr.Textbox(label="Steps to Desnag")
44
  ],
45
- title="Construction Image Analyzer",
46
- description="Upload a construction site image to identify issues and get desnag steps."
 
 
 
 
 
 
47
  )
48
 
49
  # Launch the app
50
- iface.launch()
 
 
1
+ import os
2
+ import base64
3
+ import requests
4
  import gradio as gr
5
  from PIL import Image
6
+ import io
 
7
 
8
+ # Load environment variables
9
+ GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
10
+ GROQ_API_URL = "https://api.groq.com/v1/chat/completions"
11
+
12
+ def encode_image(image_path):
13
+ with open(image_path, "rb") as image_file:
14
+ return base64.b64encode(image_file.read()).decode('utf-8')
15
 
16
  def analyze_construction_image(image):
17
+ if image is None:
18
+ return "Error: No image uploaded", "", ""
19
+
20
+ try:
21
+ # Convert PIL Image to base64
22
+ buffered = io.BytesIO()
23
+ image.save(buffered, format="PNG")
24
+ img_str = base64.b64encode(buffered.getvalue()).decode()
25
+
26
+ # Prepare the message for Groq API
27
+ messages = [
28
+ {
29
+ "role": "system",
30
+ "content": "You are an AI assistant specialized in analyzing construction site images. Identify issues, categorize them, and provide steps to resolve them."
31
+ },
32
+ {
33
+ "role": "user",
34
+ "content": [
35
+ {
36
+ "type": "text",
37
+ "text": "Analyze this construction image. Identify the snag category, provide a detailed snag description, and list steps to desnag."
38
+ },
39
+ {
40
+ "type": "image_url",
41
+ "image_url": f"data:image/png;base64,{img_str}"
42
+ }
43
+ ]
44
+ }
45
+ ]
46
+
47
+ # Make API request to Groq
48
+ headers = {
49
+ "Authorization": f"Bearer {GROQ_API_KEY}",
50
+ "Content-Type": "application/json"
51
+ }
52
+ data = {
53
+ "model": "llama3-2-vision-90b", # Adjust model name if necessary
54
+ "messages": messages,
55
+ "max_tokens": 300,
56
+ "temperature": 0.7
57
+ }
58
+
59
+ response = requests.post(GROQ_API_URL, headers=headers, json=data)
60
+ response.raise_for_status()
61
+
62
+ result = response.json()["choices"][0]["message"]["content"]
63
+
64
+ # Parse the result
65
+ lines = result.split('\n')
66
+ snag_category = lines[0] if len(lines) > 0 else "N/A"
67
+ snag_description = lines[1] if len(lines) > 1 else "N/A"
68
+ desnag_steps = "\n".join(lines[2:]) if len(lines) > 2 else "N/A"
69
+
70
+ return snag_category, snag_description, desnag_steps
71
+ except Exception as e:
72
+ return f"Error: {str(e)}", "", ""
73
 
74
  # Create the Gradio interface
75
  iface = gr.Interface(
76
  fn=analyze_construction_image,
77
+ inputs=gr.Image(type="pil", label="Upload Construction Image"),
78
  outputs=[
79
  gr.Textbox(label="Snag Category"),
80
  gr.Textbox(label="Snag Description"),
81
  gr.Textbox(label="Steps to Desnag")
82
  ],
83
+ title="Construction Image Analyzer (Llama 3.2-Vision via Groq)",
84
+ description="Upload a construction site image to identify issues and get desnag steps using Llama 3.2-Vision technology through Groq API.",
85
+ examples=[
86
+ ["example_image1.jpg"],
87
+ ["example_image2.jpg"]
88
+ ],
89
+ cache_examples=True,
90
+ theme="default"
91
  )
92
 
93
  # Launch the app
94
+ if __name__ == "__main__":
95
+ iface.launch()