# import gradio as gr # def greet(name): # return "Hello " + name + "!!" # import torch # from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig # from peft import PeftModel, PeftConfig # # class InferenceFineTunning: # # def __init__(self, model_path): # # peft_model_id = f"hyang0503/{model_path}" # # config = PeftConfig.from_pretrained(peft_model_id) # # bnb_config = BitsAndBytesConfig( # # load_in_4bit=True, # # bnb_4bit_use_double_quant=True, # # bnb_4bit_quant_type="nf4", # # bnb_4bit_compute_dtype=torch.bfloat16 # # ) # # self.model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, quantization_config=bnb_config, device_map="auto") # # self.model = PeftModel.from_pretrained(self.model, peft_model_id) # # # self.tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path) # # self.tokenizer = AutoTokenizer.from_pretrained(peft_model_id) # # self.tokenizer.pad_token = self.tokenizer.eos_token # # self.model.eval() # # def generate(self, q): # 실습 노트북과 내용 다름 # # outputs = self.model.generate( # # **self.tokenizer( # # f"### 질문: {q}\n\n### 답변:", # # return_tensors='pt', # # return_token_type_ids=False # # ).to("cuda"), # # max_new_tokens=256, # # early_stopping=True, # # do_sample=True, # # eos_token_id=2, # # ) # # print(self.tokenizer.decode(outputs[0])) # # ifg = InferenceFineTunning("qlora-koalpaca") # # iface = gr.Interface(fn=ifg.generate, inputs="text", outputs="text") # iface = gr.Interface(fn=greet, inputs="text", outputs="text") # iface.launch() import torch import gradio as gr from peft import PeftModel, PeftConfig from transformers import AutoModelForCausalLM, AutoTokenizer device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") peft_model_id = "hyang0503/qlora-koalpaca" config = PeftConfig.from_pretrained(peft_model_id) model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path) model = PeftModel.from_pretrained(model, peft_model_id).to(device) tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path) def generate(q): inputs = tokenizer(f"### 질문: {q}\n\n### 답변:", return_tensors='pt', return_token_type_ids=False) outputs = model.generate( **{k: v.to(device) for k, v in inputs.items()}, max_new_tokens=256, do_sample=True, eos_token_id=2, ) result = tokenizer.decode(outputs[0]) answer_idx = result.find("### 답변:") answer = result[answer_idx + 7:].strip() return answer gr.Interface(generate, "text", "text").launch(share=True)