Spaces:

datascientist22
/

exam-corrector-chatbot

Sleeping

App Files Files Community

datascientist22 commited on Sep 7

Commit

eaf4e19

•

1 Parent(s): 253e08c

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -36

app.py CHANGED Viewed

@@ -1,13 +1,19 @@
 import streamlit as st
-import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
-# Load the tokenizer and model using PyTorch
 tokenizer = AutoTokenizer.from_pretrained("MohamedMotaz/Examination-llama-8b-4bit")
-model = AutoModelForCausalLM.from_pretrained("MohamedMotaz/Examination-llama-8b-4bit", torch_dtype=torch.float16).to("cuda" if torch.cuda.is_available() else "cpu")
 # App Title
-st.title("Exam Corrector: Automated Grading with LLama 8b Model (PyTorch)")
 # Instructions
 st.markdown("""
@@ -20,46 +26,18 @@ st.markdown("""
 model_answer = st.text_area("Model Answer", "The process of photosynthesis involves converting light energy into chemical energy.")
 student_answer = st.text_area("Student Answer", "Photosynthesis is when plants turn light into energy.")
-# Display documentation in the app
-with st.expander("Click to View Documentation"):
-    st.markdown("""
-    ## Exam-Corrector: A Fine-tuned LLama 8b Model
-    Exam-corrector is a fine-tuned version of the LLama 8b model, specifically adapted to function as a written question corrector. This model grades student answers by comparing them against model answers using predefined instructions.
-    ### Model Description:
-    The model ensures consistent and fair grading for written answers. Full marks are given to student answers that convey the complete meaning of the model answer, even with different wording.
-    ### Grading Instructions:
-    - Model Answer is only used as a reference and does not receive marks.
-    - Full marks are awarded when student answers convey the full meaning of the model answer.
-    - Partial marks are deducted for incomplete or irrelevant information.
-    ### Input Format:
-    - **Model Answer**: {model_answer}
-    - **Student Answer**: {student_answer}
-    ### Output Format:
-    - **Grade**: {grade}
-    - **Explanation**: {explanation}
-    ### Training Details:
-    - Fine-tuned with LoRA (Low-Rank Adaptation).
-    - Percentage of trainable model parameters: 3.56%.
-    """)
 # Button to trigger grading
 if st.button("Grade Answer"):
     # Combine inputs into the required prompt format
     inputs = f"Model Answer: {model_answer}\n\nStudent Answer: {student_answer}\n\nResponse:"
     # Tokenize the inputs using PyTorch tensors
-    input_ids = tokenizer(inputs, return_tensors="pt").input_ids.to(model.device)
-    # Generate the response using the model (PyTorch)
     with torch.no_grad():
         outputs = model.generate(input_ids, max_length=200)
     # Decode the output
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)

 import streamlit as st
 from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+# Load the tokenizer and model for CPU (avoid bitsandbytes quantization)
 tokenizer = AutoTokenizer.from_pretrained("MohamedMotaz/Examination-llama-8b-4bit")
+model = AutoModelForCausalLM.from_pretrained(
+    "MohamedMotaz/Examination-llama-8b-4bit",
+    torch_dtype=torch.float32  # Use float32 to avoid 8-bit quantization
+)
+# Ensure the model runs on CPU
+model = model.to("cpu")
 # App Title
+st.title("Exam Corrector: Automated Grading with LLama 8b Model (CPU)")
 # Instructions
 st.markdown("""
 model_answer = st.text_area("Model Answer", "The process of photosynthesis involves converting light energy into chemical energy.")
 student_answer = st.text_area("Student Answer", "Photosynthesis is when plants turn light into energy.")
 # Button to trigger grading
 if st.button("Grade Answer"):
     # Combine inputs into the required prompt format
     inputs = f"Model Answer: {model_answer}\n\nStudent Answer: {student_answer}\n\nResponse:"
     # Tokenize the inputs using PyTorch tensors
+    input_ids = tokenizer(inputs, return_tensors="pt").input_ids.to("cpu")
+    # Generate the response using the model (PyTorch, CPU-based)
     with torch.no_grad():
         outputs = model.generate(input_ids, max_length=200)
     # Decode the output
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)