rumed-phi3-mini / app.py
Mykes's picture
Update app.py
f6b26e3 verified
import streamlit as st
from llama_cpp import Llama
st.set_page_config(page_title="russian medical phi3-mini", page_icon="πŸ€–", layout="wide")
# Custom CSS for better styling
st.markdown("""
<style>
.stTextInput > div > div > input {
background-color: #f0f2f6;
}
.chat-message {
padding: 1.5rem; border-radius: 0.5rem; margin-bottom: 1rem; display: flex
}
.chat-message.user {
background-color: #2b313e
}
.chat-message.bot {
background-color: #475063
}
.chat-message .avatar {
width: 20%;
}
.chat-message .avatar img {
max-width: 78px;
max-height: 78px;
border-radius: 50%;
object-fit: cover;
}
.chat-message .message {
width: 80%;
padding: 0 1.5rem;
color: #fff;
}
.chat-message.user::before {
content: "πŸ‘€";
font-size: 1.5em;
margin-right: 0.5em;
}
.chat-message.bot::before {
content: "πŸ€–";
font-size: 1.5em;
margin-right: 0.5em;
}
</style>
""", unsafe_allow_html=True)
@st.cache_resource
def load_model():
return Llama.from_pretrained(
repo_id="Mykes/med_phi3-mini-4k-GGUF",
# repo_id="Mykes/med_tinyllama_gguf",
filename="*Q4_K_M.gguf",
verbose=False,
n_ctx=256,
n_batch=128,
n_threads=8,
use_mlock=True,
use_mmap=True,
# n_ctx=256, # Reduced context window
# n_batch=8, # Smaller batch size
# n_threads=2, # Adjust based on your CPU cores
# use_mmap=True,
)
llm = load_model()
def format_context(messages):
context = ""
for message in messages:
if message["role"] == "user":
context += f"Human: {message['content']}\n"
else:
context += f"Assistant: {message['content']}\n"
return context
# Sidebar
st.sidebar.title("Chat with AI")
st.sidebar.markdown("This is a simple chat interface using Streamlit and an AI model.")
# # Add useful information to the sidebar
# st.sidebar.header("How to use")
# st.sidebar.markdown("""
# 1. Type your question in the chat input box at the bottom of the screen.
# 2. Press Enter or click the Send button to submit your question.
# 3. The AI will generate a response based on your input.
# 4. You can have a continuous conversation by asking follow-up questions.
# """)
st.sidebar.header("Model Information")
st.sidebar.markdown("""
- Model: med_phi3-mini-4k-GGUF
([View on Hugging Face](https://huggingface.co/Mykes/med_phi3-mini-4k-GGUF))
- Context Length: 512 tokens
- This model is specialized in medical knowledge.
- Russian language
""")
st.sidebar.header("Tips")
st.sidebar.markdown("""
- Be clear and specific in your questions.
- For medical queries, provide relevant details.
- Remember that this is an AI model and may not always be 100% accurate.
""")
# Main chat interface
st.title("russian medical phi3-mini")
# Initialize chat history
if "messages" not in st.session_state:
st.session_state.messages = []
# Display chat messages from history on app rerun
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# React to user input
if prompt := st.chat_input("What is your question?"):
# Display user message in chat message container
st.chat_message("user").markdown(prompt)
# Add user message to chat history
st.session_state.messages.append({"role": "user", "content": prompt})
# Format the context with the last 5 messages
context = format_context(st.session_state.messages[-3:])
# Prepare the model input
model_input = f"{context}Human: {prompt}\nAssistant:"
# Display assistant response in chat message container
with st.chat_message("assistant"):
message_placeholder = st.empty()
full_response = ""
for token in llm(
model_input,
max_tokens=None,
stop=["Human:", "<end_of_turn>"],
echo=True,
stream=True
):
full_response += token['choices'][0]['text']
message_placeholder.markdown(full_response + "β–Œ")
# Remove the initial context and prompt from the response
assistant_response = full_response.split("Assistant:")[-1].strip()
message_placeholder.markdown(assistant_response)
# Add assistant response to chat history
st.session_state.messages.append({"role": "assistant", "content": assistant_response})
# Add a button to clear the chat history
if st.sidebar.button("Clear Chat History"):
st.session_state.messages = []
st.experimental_rerun()
# Display the number of messages in the current conversation
st.sidebar.markdown(f"Current conversation length: {len(st.session_state.messages)} messages")
# Add a footer
st.sidebar.markdown("---")
st.sidebar.markdown("Created with ❀️ using Streamlit and Llama.cpp")