import streamlit as st from llama_cpp import Llama llm = Llama.from_pretrained( repo_id="Mykes/med_gemma7b_gguf", filename="*Q4_K_M.gguf", verbose=False ) input_text = st.textarea('text') if text: output = llm( input_text, # Prompt max_tokens=32, # Generate up to 32 tokens, set to None to generate up to the end of the context window stop=["Q:", "\n"], # Stop generating just before the model would generate a new question echo=True # Echo the prompt back in the output ) # Generate a completion, can also call create_completion st.write(outputs) # from ctransformers import AutoModelForCausalLM, AutoTokenizer # model = AutoModelForCausalLM.from_pretrained("Mykes/med_gemma7b_gguf", model_file="unsloth.Q4_K_M.gguf") # tokenizer = AutoTokenizer.from_pretrained(model) # input_text = st.textarea('text') # if text: # input_ids = tokenizer(input_text, return_tensors="pt") # outputs = model.generate(**input_ids) # st.write(outputs) # from transformers import AutoTokenizer, AutoModelForCausalLM # model_id = "Mykes/med_gemma7b_gguf" # filename = "unsloth.Q4_K_M.gguf" # tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename) # model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename) # input_text = st.textarea('text') # if text: # input_ids = tokenizer(input_text, return_tensors="pt") # outputs = model.generate(**input_ids) # st.write(outputs)