import os import base64 import gradio as gr from PIL import Image import io import json from groq import Groq import logging import cv2 import numpy as np # Set up logging logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) # Load environment variables GROQ_API_KEY = os.environ.get("GROQ_API_KEY") if not GROQ_API_KEY: logger.error("GROQ_API_KEY is not set in environment variables") raise ValueError("GROQ_API_KEY is not set") # Initialize Groq client client = Groq(api_key=GROQ_API_KEY) def encode_image(image): try: if isinstance(image, str): # If image is a file path with open(image, "rb") as image_file: return base64.b64encode(image_file.read()).decode('utf-8') elif isinstance(image, Image.Image): # If image is a PIL Image buffered = io.BytesIO() image.save(buffered, format="PNG") return base64.b64encode(buffered.getvalue()).decode('utf-8') elif isinstance(image, np.ndarray): # If image is a numpy array (from video) is_success, buffer = cv2.imencode(".png", image) if is_success: return base64.b64encode(buffer).decode('utf-8') else: raise ValueError(f"Unsupported image type: {type(image)}") except Exception as e: logger.error(f"Error encoding image: {str(e)}") raise def analyze_construction_image(images, video=None): if not images and video is None: logger.warning("No images or video provided") return [("No input", "Error: Please upload images or a video for analysis.")] try: logger.info("Starting analysis") results = [] if images: for i, image in enumerate(images): image_data_url = f"data:image/png;base64,{encode_image(image)}" messages = [ { "role": "user", "content": [ { "type": "text", "text": f"Analyze this construction site image (Image {i+1}/{len(images)}). Identify any safety issues or hazards, categorize them, provide a detailed description, and suggest steps to resolve them." }, { "type": "image_url", "image_url": { "url": image_data_url } } ] } ] completion = client.chat.completions.create( model="llama-3.2-90b-vision-preview", messages=messages, temperature=0.7, max_tokens=1000, top_p=1, stream=False, stop=None ) result = completion.choices[0].message.content results.append((f"Image {i+1} analysis", result)) if video: cap = cv2.VideoCapture(video.name) frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) fps = int(cap.get(cv2.CAP_PROP_FPS)) duration = frame_count / fps # Analyze frames at 0%, 25%, 50%, 75%, and 100% of the video duration for i, time_point in enumerate([0, 0.25, 0.5, 0.75, 1]): cap.set(cv2.CAP_PROP_POS_MSEC, time_point * duration * 1000) ret, frame = cap.read() if ret: image_data_url = f"data:image/png;base64,{encode_image(frame)}" messages = [ { "role": "user", "content": [ { "type": "text", "text": f"Analyze this frame from a construction site video (Frame {i+1}/5 at {time_point*100}% of video duration). Identify any safety issues or hazards, categorize them, provide a detailed description, and suggest steps to resolve them." }, { "type": "image_url", "image_url": { "url": image_data_url } } ] } ] completion = client.chat.completions.create( model="llama-3.2-90b-vision-preview", messages=messages, temperature=0.7, max_tokens=1000, top_p=1, stream=False, stop=None ) result = completion.choices[0].message.content results.append((f"Video frame {i+1} analysis", result)) cap.release() logger.info("Analysis completed successfully") return results except Exception as e: logger.error(f"Error during analysis: {str(e)}") logger.error(traceback.format_exc()) error_message = f"Error during analysis: {str(e)}. Please try again or contact support if the issue persists." return [("Analysis error", error_message)] def chat_about_image(message, chat_history): try: # Prepare the conversation history for the API messages = [ {"role": "system", "content": "You are an AI assistant specialized in analyzing construction site images and answering questions about them. Use the information from the initial analysis to answer user queries."}, ] # Add chat history to messages for human, ai in chat_history: if human: messages.append({"role": "user", "content": human}) if ai: messages.append({"role": "assistant", "content": ai}) # Add the new user message messages.append({"role": "user", "content": message}) # Make API call completion = client.chat.completions.create( model="llama-3.2-90b-vision-preview", messages=messages, temperature=0.7, max_tokens=500, top_p=1, stream=False, stop=None ) response = completion.choices[0].message.content chat_history.append((message, response)) return "", chat_history except Exception as e: logger.error(f"Error during chat: {str(e)}") return "", chat_history + [(message, f"Error: {str(e)}")] # Custom CSS for improved styling custom_css = """ .container { max-width: 1200px; margin: auto; padding-top: 1.5rem; } .header { text-align: center; margin-bottom: 1rem; } .header h1 { color: #2c3e50; font-size: 2.5rem; } .subheader { color: #34495e; font-size: 1rem; line-height: 1.2; margin-bottom: 1.5rem; text-align: center; padding: 0 15px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; } .image-container { border: 2px dashed #3498db; border-radius: 10px; padding: 1rem; text-align: center; } .analyze-button { background-color: #2ecc71 !important; color: white !important; } .clear-button { background-color: #e74c3c !important; color: white !important; width: 100px !important; } .chatbot { border: 1px solid #bdc3c7; border-radius: 10px; padding: 1rem; height: 400px; overflow-y: auto; } .chat-input { border: 1px solid #bdc3c7; border-radius: 5px; padding: 0.5rem; } .groq-badge { position: fixed; bottom: 10px; right: 10px; background-color: #f39c12; color: white; padding: 5px 10px; border-radius: 5px; font-weight: bold; } .chat-container { display: flex; flex-direction: column; } .input-row { display: flex; align-items: center; margin-top: 10px; } .input-row > div:first-child { flex-grow: 1; margin-right: 10px; } """ # Create the Gradio interface with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as iface: gr.HTML( """
Enhance workplace safety and compliance with AI-powered image and video analysis using Llama 3.2 90B Vision and expert chat assistance.