import transformers from transformers import AutoTokenizer, LlamaForCausalLM import torch import streamlit as st # model = LlamaTokenizer.from_pretrained('klyang/MentaLLaMA-chat-13B') model = LlamaForCausalLM.from_pretrained('klyang/MentaLLaMA-chat-13B', device_map='auto') tokenizer = AutoTokenizer.from_pretrained(model) pipeline = transformers.pipeline( "text-generation", model=model, tokenizer=tokenizer, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto", ) sequences = pipeline( "Girafatron is obsessed with giraffes, the most glorious animal on the face of this Earth. Giraftron believes all other animals are irrelevant when compared to the glorious majesty of the giraffe.\nDaniel: Hello, Girafatron!\nGirafatron:", max_length=200, do_sample=True, top_k=10, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id, ) st.title("Testing the MentalLama Model") for seq in sequences: st.write(f"Result: {seq['generated_text']}") # prompt = "Hey, are you conscious? Can you talk to me?" # inputs = tokenizer(prompt, return_tensors="pt") # Generate # generate_ids = model.generate(inputs.input_ids, max_length=30) # tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]