import streamlit as st from transformers import AutoTokenizer, AutoModelForCausalLM import torch # Load the tokenizer and model for CPU without bitsandbytes tokenizer = AutoTokenizer.from_pretrained("MohamedMotaz/Examination-llama-8b-4bit") # Load the model in full precision, explicitly avoiding 8-bit quantization model = AutoModelForCausalLM.from_pretrained( "MohamedMotaz/Examination-llama-8b-4bit", torch_dtype=torch.float32, # Ensure it uses full precision (float32) device_map="cpu", # Force the model to run on the CPU ) # App Title st.title("Exam Corrector: Automated Grading with LLama 8b Model (CPU)") # Instructions st.markdown(""" ### Instructions: - Enter both the **Model Answer** and the **Student Answer**. - Click on the **Grade Answer** button to get the grade and explanation. """) # Input fields for Model Answer and Student Answer model_answer = st.text_area("Model Answer", "The process of photosynthesis involves converting light energy into chemical energy.") student_answer = st.text_area("Student Answer", "Photosynthesis is when plants turn light into energy.") # Button to trigger grading if st.button("Grade Answer"): # Combine inputs into the required prompt format inputs = f"Model Answer: {model_answer}\n\nStudent Answer: {student_answer}\n\nResponse:" # Tokenize the inputs using PyTorch tensors input_ids = tokenizer(inputs, return_tensors="pt").input_ids # Generate the response using the model (PyTorch, CPU-based) with torch.no_grad(): outputs = model.generate(input_ids, max_length=200) # Decode the output response = tokenizer.decode(outputs[0], skip_special_tokens=True) # Display the grade and explanation st.subheader("Grading Results") st.write(response) # Footer and app creator details st.markdown(""" --- **App created by [Engr. Hamesh Raj](https://www.linkedin.com/in/hamesh-raj)** """)