hyang0503 commited on
Commit
3c33c7e
1 Parent(s): 71d980a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -33
app.py CHANGED
@@ -1,43 +1,44 @@
1
  import gradio as gr
2
 
3
- # def greet(name):
4
- # return "Hello " + name + "!!"
5
 
6
  import torch
7
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
8
  from peft import PeftModel, PeftConfig
9
 
10
- class InferenceFineTunning:
11
- def __init__(self, model_path):
12
- peft_model_id = f"hyang0503/{model_path}"
13
- config = PeftConfig.from_pretrained(peft_model_id)
14
- bnb_config = BitsAndBytesConfig(
15
- load_in_4bit=True,
16
- bnb_4bit_use_double_quant=True,
17
- bnb_4bit_quant_type="nf4",
18
- bnb_4bit_compute_dtype=torch.bfloat16
19
- )
20
- self.model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, quantization_config=bnb_config, device_map="auto")
21
- self.model = PeftModel.from_pretrained(self.model, peft_model_id)
22
 
23
- # self.tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
24
- self.tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
25
- self.tokenizer.pad_token = self.tokenizer.eos_token
26
- self.model.eval()
27
 
28
- def generate(self, q): # 실습 노트북과 내용 다름
29
- outputs = self.model.generate(
30
- **self.tokenizer(
31
- f"### 질문: {q}\n\n### 답변:",
32
- return_tensors='pt',
33
- return_token_type_ids=False
34
- ).to("cuda"),
35
- max_new_tokens=256,
36
- early_stopping=True,
37
- do_sample=True,
38
- eos_token_id=2,
39
- )
40
- print(self.tokenizer.decode(outputs[0]))
41
- ifg = InferenceFineTunning("qlora-koalpaca")
42
- iface = gr.Interface(fn=ifg.generate, inputs="text", outputs="text")
 
43
  iface.launch()
 
1
  import gradio as gr
2
 
3
+ def greet(name):
4
+ return "Hello " + name + "!!"
5
 
6
  import torch
7
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
8
  from peft import PeftModel, PeftConfig
9
 
10
+ # class InferenceFineTunning:
11
+ # def __init__(self, model_path):
12
+ # peft_model_id = f"hyang0503/{model_path}"
13
+ # config = PeftConfig.from_pretrained(peft_model_id)
14
+ # bnb_config = BitsAndBytesConfig(
15
+ # load_in_4bit=True,
16
+ # bnb_4bit_use_double_quant=True,
17
+ # bnb_4bit_quant_type="nf4",
18
+ # bnb_4bit_compute_dtype=torch.bfloat16
19
+ # )
20
+ # self.model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, quantization_config=bnb_config, device_map="auto")
21
+ # self.model = PeftModel.from_pretrained(self.model, peft_model_id)
22
 
23
+ # # self.tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
24
+ # self.tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
25
+ # self.tokenizer.pad_token = self.tokenizer.eos_token
26
+ # self.model.eval()
27
 
28
+ # def generate(self, q): # 실습 노트북과 내용 다름
29
+ # outputs = self.model.generate(
30
+ # **self.tokenizer(
31
+ # f"### 질문: {q}\n\n### 답변:",
32
+ # return_tensors='pt',
33
+ # return_token_type_ids=False
34
+ # ).to("cuda"),
35
+ # max_new_tokens=256,
36
+ # early_stopping=True,
37
+ # do_sample=True,
38
+ # eos_token_id=2,
39
+ # )
40
+ # print(self.tokenizer.decode(outputs[0]))
41
+ # ifg = InferenceFineTunning("qlora-koalpaca")
42
+ # iface = gr.Interface(fn=ifg.generate, inputs="text", outputs="text")
43
+ iface = gr.Interface(fn=greet, inputs="text", outputs="text")
44
  iface.launch()