Spaces:
Sleeping
Sleeping
import streamlit as st | |
from llama_cpp import Llama | |
llm = Llama.from_pretrained( | |
repo_id="Mykes/med_gemma7b_gguf", | |
filename="*Q4_K_M.gguf", | |
verbose=False | |
) | |
input_text = st.textarea('text') | |
if text: | |
output = llm( | |
input_text, # Prompt | |
max_tokens=32, # Generate up to 32 tokens, set to None to generate up to the end of the context window | |
stop=["Q:", "\n"], # Stop generating just before the model would generate a new question | |
echo=True # Echo the prompt back in the output | |
) # Generate a completion, can also call create_completion | |
st.write(outputs) | |
# from ctransformers import AutoModelForCausalLM, AutoTokenizer | |
# model = AutoModelForCausalLM.from_pretrained("Mykes/med_gemma7b_gguf", model_file="unsloth.Q4_K_M.gguf") | |
# tokenizer = AutoTokenizer.from_pretrained(model) | |
# input_text = st.textarea('text') | |
# if text: | |
# input_ids = tokenizer(input_text, return_tensors="pt") | |
# outputs = model.generate(**input_ids) | |
# st.write(outputs) | |
# from transformers import AutoTokenizer, AutoModelForCausalLM | |
# model_id = "Mykes/med_gemma7b_gguf" | |
# filename = "unsloth.Q4_K_M.gguf" | |
# tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename) | |
# model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename) | |
# input_text = st.textarea('text') | |
# if text: | |
# input_ids = tokenizer(input_text, return_tensors="pt") | |
# outputs = model.generate(**input_ids) | |
# st.write(outputs) |