Spaces:
Sleeping
Sleeping
import streamlit as st | |
from llama_cpp import Llama | |
st.set_page_config(page_title="russian medical phi3-mini", page_icon="π€", layout="wide") | |
# Custom CSS for better styling | |
st.markdown(""" | |
<style> | |
.stTextInput > div > div > input { | |
background-color: #f0f2f6; | |
} | |
.chat-message { | |
padding: 1.5rem; border-radius: 0.5rem; margin-bottom: 1rem; display: flex | |
} | |
.chat-message.user { | |
background-color: #2b313e | |
} | |
.chat-message.bot { | |
background-color: #475063 | |
} | |
.chat-message .avatar { | |
width: 20%; | |
} | |
.chat-message .avatar img { | |
max-width: 78px; | |
max-height: 78px; | |
border-radius: 50%; | |
object-fit: cover; | |
} | |
.chat-message .message { | |
width: 80%; | |
padding: 0 1.5rem; | |
color: #fff; | |
} | |
.chat-message.user::before { | |
content: "π€"; | |
font-size: 1.5em; | |
margin-right: 0.5em; | |
} | |
.chat-message.bot::before { | |
content: "π€"; | |
font-size: 1.5em; | |
margin-right: 0.5em; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
def load_model(): | |
return Llama.from_pretrained( | |
repo_id="Mykes/med_phi3-mini-4k-GGUF", | |
# repo_id="Mykes/med_tinyllama_gguf", | |
filename="*Q4_K_M.gguf", | |
verbose=False, | |
n_ctx=256, | |
n_batch=128, | |
n_threads=8, | |
use_mlock=True, | |
use_mmap=True, | |
# n_ctx=256, # Reduced context window | |
# n_batch=8, # Smaller batch size | |
# n_threads=2, # Adjust based on your CPU cores | |
# use_mmap=True, | |
) | |
llm = load_model() | |
def format_context(messages): | |
context = "" | |
for message in messages: | |
if message["role"] == "user": | |
context += f"Human: {message['content']}\n" | |
else: | |
context += f"Assistant: {message['content']}\n" | |
return context | |
# Sidebar | |
st.sidebar.title("Chat with AI") | |
st.sidebar.markdown("This is a simple chat interface using Streamlit and an AI model.") | |
# # Add useful information to the sidebar | |
# st.sidebar.header("How to use") | |
# st.sidebar.markdown(""" | |
# 1. Type your question in the chat input box at the bottom of the screen. | |
# 2. Press Enter or click the Send button to submit your question. | |
# 3. The AI will generate a response based on your input. | |
# 4. You can have a continuous conversation by asking follow-up questions. | |
# """) | |
st.sidebar.header("Model Information") | |
st.sidebar.markdown(""" | |
- Model: med_phi3-mini-4k-GGUF | |
([View on Hugging Face](https://huggingface.co/Mykes/med_phi3-mini-4k-GGUF)) | |
- Context Length: 512 tokens | |
- This model is specialized in medical knowledge. | |
- Russian language | |
""") | |
st.sidebar.header("Tips") | |
st.sidebar.markdown(""" | |
- Be clear and specific in your questions. | |
- For medical queries, provide relevant details. | |
- Remember that this is an AI model and may not always be 100% accurate. | |
""") | |
# Main chat interface | |
st.title("russian medical phi3-mini") | |
# Initialize chat history | |
if "messages" not in st.session_state: | |
st.session_state.messages = [] | |
# Display chat messages from history on app rerun | |
for message in st.session_state.messages: | |
with st.chat_message(message["role"]): | |
st.markdown(message["content"]) | |
# React to user input | |
if prompt := st.chat_input("What is your question?"): | |
# Display user message in chat message container | |
st.chat_message("user").markdown(prompt) | |
# Add user message to chat history | |
st.session_state.messages.append({"role": "user", "content": prompt}) | |
# Format the context with the last 5 messages | |
context = format_context(st.session_state.messages[-3:]) | |
# Prepare the model input | |
model_input = f"{context}Human: {prompt}\nAssistant:" | |
# Display assistant response in chat message container | |
with st.chat_message("assistant"): | |
message_placeholder = st.empty() | |
full_response = "" | |
for token in llm( | |
model_input, | |
max_tokens=None, | |
stop=["Human:", "<end_of_turn>"], | |
echo=True, | |
stream=True | |
): | |
full_response += token['choices'][0]['text'] | |
message_placeholder.markdown(full_response + "β") | |
# Remove the initial context and prompt from the response | |
assistant_response = full_response.split("Assistant:")[-1].strip() | |
message_placeholder.markdown(assistant_response) | |
# Add assistant response to chat history | |
st.session_state.messages.append({"role": "assistant", "content": assistant_response}) | |
# Add a button to clear the chat history | |
if st.sidebar.button("Clear Chat History"): | |
st.session_state.messages = [] | |
st.experimental_rerun() | |
# Display the number of messages in the current conversation | |
st.sidebar.markdown(f"Current conversation length: {len(st.session_state.messages)} messages") | |
# Add a footer | |
st.sidebar.markdown("---") | |
st.sidebar.markdown("Created with β€οΈ using Streamlit and Llama.cpp") |