srinidhidevaraj commited on
Commit
c8b83ad
1 Parent(s): d00eb9f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -0
app.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ # from dotenv import load_dotenv
3
+ import streamlit as st
4
+ import PIL.Image
5
+ import google.generativeai as genai
6
+ from langchain.prompts import ChatPromptTemplate
7
+ from langchain_community.llms import Ollama
8
+ from transformers import MllamaForConditionalGeneration, AutoProcessor
9
+ import torch
10
+ from accelerate import init_empty_weights
11
+ # Load environment variables
12
+
13
+ # Configure Gemini API
14
+ # genai.configure(api_key=os.getenv("gkey2"))
15
+
16
+ # Define the prompt template
17
+ # prompt = ChatPromptTemplate.from_messages(
18
+ # [
19
+ # ("system", "You are a helpful assistant. Please respond to the user's queries."),
20
+ # ("user", "Question: {question}")
21
+ # ]
22
+ # )
23
+
24
+ prompt="<|image|><|begin_of_text|>You are a helpful assistant. Please respond to the user's queries."
25
+
26
+ # Initialize the Llama model
27
+ # model = Ollama(model="llama3.2")
28
+ model_id = "meta-llama/Llama-3.2-11B-Vision"
29
+
30
+ model = MllamaForConditionalGeneration.from_pretrained(
31
+ model_id,
32
+ torch_dtype=torch.bfloat16,
33
+ device_map="auto",
34
+ )
35
+ processor = AutoProcessor.from_pretrained(model_id)
36
+
37
+ # Define function to get response from the model
38
+ def get_gemin_response(input_text, img):
39
+ # complete_prompt = prompt.format(question=input_text)
40
+ inputs = processor(images=img, text=prompt, return_tensors="pt").to(model.device)
41
+ response=model.generate(**inputs, max_new_tokens=30)
42
+
43
+ # if input_text != "":
44
+ # # Only generate content from input text if present
45
+ # response = model.generate([input_text])
46
+ # else:
47
+ # response = model.generate([img_text])
48
+ return response
49
+
50
+ # Define the main function for the Streamlit app
51
+ def main():
52
+ st.set_page_config(page_title='Gemini Image & Text')
53
+ st.header('Gemini LLM Application')
54
+
55
+ # Input text
56
+ input_text = st.text_input("Input :", key='input')
57
+
58
+ # Image uploader
59
+ imgupload = st.file_uploader('Choose an image file', type=['jpg', 'jpeg', 'png'])
60
+
61
+ # Display uploaded image and convert to text format (if needed)
62
+ img_text = ""
63
+ if imgupload is not None:
64
+ img = PIL.Image.open(imgupload)
65
+ st.image(img, caption='Uploaded Image', use_column_width=True)
66
+ img_text = "Image uploaded successfully."
67
+ if st.button('Generate Response'):
68
+ # Ensure both inputs are provided
69
+ if img is not None and input_text:
70
+ # Get response from the model
71
+ response = get_gemin_response(input_text, img)
72
+ st.write(processor.decode(response[0]))
73
+ else:
74
+ st.error("Please provide both input text and an image before generating a response.")
75
+
76
+
77
+ # Run the app
78
+ if __name__ == "__main__":
79
+ main()