Spaces:
Sleeping
Sleeping
# import gradio as gr | |
# def greet(name): | |
# return "Hello " + name + "!!" | |
# import torch | |
# from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig | |
# from peft import PeftModel, PeftConfig | |
# # class InferenceFineTunning: | |
# # def __init__(self, model_path): | |
# # peft_model_id = f"hyang0503/{model_path}" | |
# # config = PeftConfig.from_pretrained(peft_model_id) | |
# # bnb_config = BitsAndBytesConfig( | |
# # load_in_4bit=True, | |
# # bnb_4bit_use_double_quant=True, | |
# # bnb_4bit_quant_type="nf4", | |
# # bnb_4bit_compute_dtype=torch.bfloat16 | |
# # ) | |
# # self.model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, quantization_config=bnb_config, device_map="auto") | |
# # self.model = PeftModel.from_pretrained(self.model, peft_model_id) | |
# # # self.tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path) | |
# # self.tokenizer = AutoTokenizer.from_pretrained(peft_model_id) | |
# # self.tokenizer.pad_token = self.tokenizer.eos_token | |
# # self.model.eval() | |
# # def generate(self, q): # 실습 노트북과 내용 다름 | |
# # outputs = self.model.generate( | |
# # **self.tokenizer( | |
# # f"### 질문: {q}\n\n### 답변:", | |
# # return_tensors='pt', | |
# # return_token_type_ids=False | |
# # ).to("cuda"), | |
# # max_new_tokens=256, | |
# # early_stopping=True, | |
# # do_sample=True, | |
# # eos_token_id=2, | |
# # ) | |
# # print(self.tokenizer.decode(outputs[0])) | |
# # ifg = InferenceFineTunning("qlora-koalpaca") | |
# # iface = gr.Interface(fn=ifg.generate, inputs="text", outputs="text") | |
# iface = gr.Interface(fn=greet, inputs="text", outputs="text") | |
# iface.launch() | |
import torch | |
import gradio as gr | |
from peft import PeftModel, PeftConfig | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") | |
peft_model_id = "hyang0503/qlora-koalpaca" | |
config = PeftConfig.from_pretrained(peft_model_id) | |
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path) | |
model = PeftModel.from_pretrained(model, peft_model_id).to(device) | |
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path) | |
def generate(q): | |
inputs = tokenizer(f"### 질문: {q}\n\n### 답변:", return_tensors='pt', return_token_type_ids=False) | |
outputs = model.generate( | |
**{k: v.to(device) for k, v in inputs.items()}, | |
max_new_tokens=256, | |
do_sample=True, | |
eos_token_id=2, | |
) | |
result = tokenizer.decode(outputs[0]) | |
answer_idx = result.find("### 답변:") | |
answer = result[answer_idx + 7:].strip() | |
return answer | |
gr.Interface(generate, "text", "text").launch(share=True) |