Spaces:

Mykes
/

rumed-phi3-mini

Sleeping

App Files Files Community

rumed-phi3-mini / app.py

Mykes

Update app.py

f6b26e3 verified 4 months ago

raw

history blame contribute delete

4.83 kB

	import streamlit as st
	from llama_cpp import Llama

	st.set_page_config(page_title="russian medical phi3-mini", page_icon="🤖", layout="wide")

	# Custom CSS for better styling
	st.markdown("""
	<style>
	.stTextInput > div > div > input {
	background-color: #f0f2f6;
	}
	.chat-message {
	padding: 1.5rem; border-radius: 0.5rem; margin-bottom: 1rem; display: flex
	}
	.chat-message.user {
	background-color: #2b313e
	}
	.chat-message.bot {
	background-color: #475063
	}
	.chat-message .avatar {
	width: 20%;
	}
	.chat-message .avatar img {
	max-width: 78px;
	max-height: 78px;
	border-radius: 50%;
	object-fit: cover;
	}
	.chat-message .message {
	width: 80%;
	padding: 0 1.5rem;
	color: #fff;
	}
	.chat-message.user::before {
	content: "👤";
	font-size: 1.5em;
	margin-right: 0.5em;
	}

	.chat-message.bot::before {
	content: "🤖";
	font-size: 1.5em;
	margin-right: 0.5em;
	}
	</style>
	""", unsafe_allow_html=True)

	@st.cache_resource
	def load_model():
	return Llama.from_pretrained(
	repo_id="Mykes/med_phi3-mini-4k-GGUF",
	# repo_id="Mykes/med_tinyllama_gguf",
	filename="*Q4_K_M.gguf",
	verbose=False,
	n_ctx=256,
	n_batch=128,
	n_threads=8,
	use_mlock=True,
	use_mmap=True,
	# n_ctx=256, # Reduced context window
	# n_batch=8, # Smaller batch size
	# n_threads=2, # Adjust based on your CPU cores
	# use_mmap=True,
	)

	llm = load_model()

	def format_context(messages):
	context = ""
	for message in messages:
	if message["role"] == "user":
	context += f"Human: {message['content']}\n"
	else:
	context += f"Assistant: {message['content']}\n"
	return context

	# Sidebar
	st.sidebar.title("Chat with AI")
	st.sidebar.markdown("This is a simple chat interface using Streamlit and an AI model.")

	# # Add useful information to the sidebar
	# st.sidebar.header("How to use")
	# st.sidebar.markdown("""
	# 1. Type your question in the chat input box at the bottom of the screen.
	# 2. Press Enter or click the Send button to submit your question.
	# 3. The AI will generate a response based on your input.
	# 4. You can have a continuous conversation by asking follow-up questions.
	# """)

	st.sidebar.header("Model Information")
	st.sidebar.markdown("""
	- Model: med_phi3-mini-4k-GGUF
	([View on Hugging Face](https://huggingface.co/Mykes/med_phi3-mini-4k-GGUF))
	- Context Length: 512 tokens
	- This model is specialized in medical knowledge.
	- Russian language
	""")

	st.sidebar.header("Tips")
	st.sidebar.markdown("""
	- Be clear and specific in your questions.
	- For medical queries, provide relevant details.
	- Remember that this is an AI model and may not always be 100% accurate.
	""")

	# Main chat interface
	st.title("russian medical phi3-mini")

	# Initialize chat history
	if "messages" not in st.session_state:
	st.session_state.messages = []

	# Display chat messages from history on app rerun
	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.markdown(message["content"])

	# React to user input
	if prompt := st.chat_input("What is your question?"):
	# Display user message in chat message container
	st.chat_message("user").markdown(prompt)
	# Add user message to chat history
	st.session_state.messages.append({"role": "user", "content": prompt})

	# Format the context with the last 5 messages
	context = format_context(st.session_state.messages[-3:])

	# Prepare the model input
	model_input = f"{context}Human: {prompt}\nAssistant:"

	# Display assistant response in chat message container
	with st.chat_message("assistant"):
	message_placeholder = st.empty()
	full_response = ""

	for token in llm(
	model_input,
	max_tokens=None,
	stop=["Human:", "<end_of_turn>"],
	echo=True,
	stream=True
	):
	full_response += token['choices'][0]['text']
	message_placeholder.markdown(full_response + "▌")

	# Remove the initial context and prompt from the response
	assistant_response = full_response.split("Assistant:")[-1].strip()
	message_placeholder.markdown(assistant_response)

	# Add assistant response to chat history
	st.session_state.messages.append({"role": "assistant", "content": assistant_response})

	# Add a button to clear the chat history
	if st.sidebar.button("Clear Chat History"):
	st.session_state.messages = []
	st.experimental_rerun()

	# Display the number of messages in the current conversation
	st.sidebar.markdown(f"Current conversation length: {len(st.session_state.messages)} messages")

	# Add a footer
	st.sidebar.markdown("---")
	st.sidebar.markdown("Created with ❤️ using Streamlit and Llama.cpp")