Spaces:
Runtime error
Runtime error
File size: 3,304 Bytes
e959087 e3c5eb3 d9d6c5f c1bdfbf 6698b86 d9d6c5f 500f2bb d9d6c5f 69618cb bb665f7 69618cb bb665f7 dc08000 bb665f7 936709d 500f2bb d9d6c5f 500f2bb dc08000 500f2bb 69618cb bb665f7 dc08000 bb665f7 69618cb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, pipeline, set_seed
device = "cuda:0" if torch.cuda.is_available() else "cpu"
repo_id = "j2moreno/TinyLlama-1.1B-Chat-v1.0-leo-finetuned"
model = AutoModelForCausalLM.from_pretrained(repo_id).to(device)
tokenizer = AutoTokenizer.from_pretrained(repo_id)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
SEED = 42
title = "Who is Leonardo Moreno?"
### This Space demonstrates model [Llama-2-70b-chat-hf](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) by Meta, a Llama 2 model with 70B parameters fine-tuned for chat instructions. This space is running on Inference Endpoints using text-generation-inference library. If you want to run your own service, you can also [deploy the model on Inference Endpoints](https://ui.endpoints.huggingface.co/).
description = """
This Space demonstrates a finetuned [TinyLlama-1.1B-Chat-v1.0](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0) that was trained on information about Leonardo Moreno. **Ask it anything about Leonardo Moreno**
Model: [j2moreno/TinyLlama-1.1B-Chat-v1.0-leo-finetuned](https://huggingface.co/j2moreno/TinyLlama-1.1B-Chat-v1.0-leo-finetuned)
Leonardo Moreno contacts:
- [LinkedIn](https://www.linkedin.com/in/jose-leonardo-moreno-/)
- [Github](https://github.com/j2moreno)
**Warning:** This space uses the free CPU Basic hardware from Hugging Face. Some steps and LLM models used below (free inference endpoints) can take some time to generate a reply.
"""
examples=[
['Who is Leonardo Moreno?'],
['Describe Leonardo Moreno\'s professional background.'],
['What projects has Leonardo Moreno worked on?'],
["What are Leonardo Moreno's core technical skills?"],
['How has Leonardo Moreno integrated AI in his work?'],
]
# @spaces.GPU
def generate_response(message, history):
set_seed(SEED)
temperature=0.4
top_p=0.95
top_k=50
max_new_tokens=256
message_template = [
{
"role": "system",
"content": "You are a highly knowledgeable and friendly chatbot equipped with extensive information across various domains. Your goal is to understand and respond to user inquiries with accuracy and clarity. You're adept at providing detailed explanations, concise summaries, and insightful responses. Your interactions are always respectful, helpful, and focused on delivering the most relevant information to the user.",
},
{"role": "user", "content": message},
]
# Set tokenize correctly. Otherwise ticking the box breaks it.
prompt = pipe.tokenizer.apply_chat_template(message_template, tokenize=False, add_generation_prompt=True)
outputs = pipe(prompt, max_new_tokens=max_new_tokens, do_sample=True,
temperature=temperature, top_k=top_k, top_p=top_p, repetition_penalty=1.10)
return outputs[0]["generated_text"].split("<|assistant|>")[-1].lstrip()
if __name__ == "__main__":
gr.ChatInterface(generate_response,
title=title,
description=description,
examples=examples,
cache_examples=True,
#additional_inputs=additional_inputs,
).launch() |