Spaces:
Sleeping
Sleeping
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline | |
import time | |
import gradio as gr | |
# bnb_config = BitsAndBytesConfig( | |
# load_in_4bit=True, | |
# bnb_4bit_quant_type="nf4", | |
# bnb_4bit_compute_dtype=torch.bfloat16, | |
# ) | |
model = AutoModelForCausalLM.from_pretrained( | |
"microsoft/Phi-3.5-mini-instruct", | |
torch_dtype=torch.bfloat16, | |
# quantization_config=bnb_config, | |
trust_remote_code=True | |
) | |
model.load_adapter('./finetunedPEFTModel') | |
tokenizer = AutoTokenizer.from_pretrained('./finetunedPEFTModel', trust_remote_code=True) | |
# tokenizer.pad_token = tokenizer.unk_token | |
# tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3.5-mini-instruct", trust_remote_code=True) | |
def generateText(inputText="What is QLora finetuning?", num_tokens=200): | |
# pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=num_tokens) | |
# result = pipe(f'''[INST] {inputText} [/INST]''') | |
# print(result[0]['generated_text']) | |
prompt = "What is model regularization?" | |
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=num_tokens) | |
result = pipe(f'''{inputText}''') | |
return result[0]['generated_text'] | |
title = "Fine tuned Phi3.5 instruct model on OpenAssist dataset using QLora" | |
description = "Fine tuned Phi3.5 instruct model on OpenAssist dataset using QLora. Running on CPU and thus a bit slow. So please be patient on submitting a request as it might take 15 to 20 minutes for a response." | |
examples = [ | |
["How can I optimize my web page for online search so that it is on top?", 200], | |
["Can you give me an example of python script for Fibonacci series?", 200], | |
["Can you explain what is Contrastive Loss in Deep Learning?", 200], | |
["How are Sentence Transformers different from Huggingface Transformers?", 200], | |
] | |
demo = gr.Interface( | |
generateText, | |
inputs = [ | |
gr.Textbox(label="Question that you want to ask"), | |
gr.Slider(100, 500, value = 200, step=100, label="Number of tokens that you want in your output"), | |
], | |
outputs = [ | |
gr.Text(), | |
], | |
title = title, | |
description = description, | |
examples = examples, | |
cache_examples=False | |
) | |
demo.launch() | |