hyang0503 commited on
Commit
05ac0b1
1 Parent(s): 3c33c7e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -39
app.py CHANGED
@@ -1,44 +1,73 @@
1
- import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
  import torch
7
- from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 
8
  from peft import PeftModel, PeftConfig
 
9
 
10
- # class InferenceFineTunning:
11
- # def __init__(self, model_path):
12
- # peft_model_id = f"hyang0503/{model_path}"
13
- # config = PeftConfig.from_pretrained(peft_model_id)
14
- # bnb_config = BitsAndBytesConfig(
15
- # load_in_4bit=True,
16
- # bnb_4bit_use_double_quant=True,
17
- # bnb_4bit_quant_type="nf4",
18
- # bnb_4bit_compute_dtype=torch.bfloat16
19
- # )
20
- # self.model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, quantization_config=bnb_config, device_map="auto")
21
- # self.model = PeftModel.from_pretrained(self.model, peft_model_id)
22
-
23
- # # self.tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
24
- # self.tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
25
- # self.tokenizer.pad_token = self.tokenizer.eos_token
26
- # self.model.eval()
27
-
28
- # def generate(self, q): # 실습 노트북과 내용 다름
29
- # outputs = self.model.generate(
30
- # **self.tokenizer(
31
- # f"### 질문: {q}\n\n### 답변:",
32
- # return_tensors='pt',
33
- # return_token_type_ids=False
34
- # ).to("cuda"),
35
- # max_new_tokens=256,
36
- # early_stopping=True,
37
- # do_sample=True,
38
- # eos_token_id=2,
39
- # )
40
- # print(self.tokenizer.decode(outputs[0]))
41
- # ifg = InferenceFineTunning("qlora-koalpaca")
42
- # iface = gr.Interface(fn=ifg.generate, inputs="text", outputs="text")
43
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
44
- iface.launch()
 
1
+ # import gradio as gr
2
+
3
+ # def greet(name):
4
+ # return "Hello " + name + "!!"
5
+
6
+ # import torch
7
+ # from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
8
+ # from peft import PeftModel, PeftConfig
9
+
10
+ # # class InferenceFineTunning:
11
+ # # def __init__(self, model_path):
12
+ # # peft_model_id = f"hyang0503/{model_path}"
13
+ # # config = PeftConfig.from_pretrained(peft_model_id)
14
+ # # bnb_config = BitsAndBytesConfig(
15
+ # # load_in_4bit=True,
16
+ # # bnb_4bit_use_double_quant=True,
17
+ # # bnb_4bit_quant_type="nf4",
18
+ # # bnb_4bit_compute_dtype=torch.bfloat16
19
+ # # )
20
+ # # self.model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, quantization_config=bnb_config, device_map="auto")
21
+ # # self.model = PeftModel.from_pretrained(self.model, peft_model_id)
22
+
23
+ # # # self.tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
24
+ # # self.tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
25
+ # # self.tokenizer.pad_token = self.tokenizer.eos_token
26
+ # # self.model.eval()
27
+
28
+ # # def generate(self, q): # 실습 노트북과 내용 다름
29
+ # # outputs = self.model.generate(
30
+ # # **self.tokenizer(
31
+ # # f"### 질문: {q}\n\n### 답변:",
32
+ # # return_tensors='pt',
33
+ # # return_token_type_ids=False
34
+ # # ).to("cuda"),
35
+ # # max_new_tokens=256,
36
+ # # early_stopping=True,
37
+ # # do_sample=True,
38
+ # # eos_token_id=2,
39
+ # # )
40
+ # # print(self.tokenizer.decode(outputs[0]))
41
+ # # ifg = InferenceFineTunning("qlora-koalpaca")
42
+ # # iface = gr.Interface(fn=ifg.generate, inputs="text", outputs="text")
43
+ # iface = gr.Interface(fn=greet, inputs="text", outputs="text")
44
+ # iface.launch()
45
 
 
 
46
 
47
  import torch
48
+ import gradio as gr
49
+
50
  from peft import PeftModel, PeftConfig
51
+ from transformers import AutoModelForCausalLM, AutoTokenizer
52
 
53
+ device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
54
+ peft_model_id = "hyang0503/qlora-koalpaca"
55
+ config = PeftConfig.from_pretrained(peft_model_id)
56
+ model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
57
+ model = PeftModel.from_pretrained(model, peft_model_id).to(device)
58
+ tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
59
+
60
+ def generate(q):
61
+ inputs = tokenizer(f"### 질문: {q}\n\n### 답변:", return_tensors='pt', return_token_type_ids=False)
62
+ outputs = model.generate(
63
+ **{k: v.to(device) for k, v in inputs.items()},
64
+ max_new_tokens=256,
65
+ do_sample=True,
66
+ eos_token_id=2,
67
+ )
68
+ result = tokenizer.decode(outputs[0])
69
+ answer_idx = result.find("### 답변:")
70
+ answer = result[answer_idx + 7:].strip()
71
+ return answer
72
+
73
+ gr.Interface(generate, "text", "text").launch(share=True)