matheusrdgsf commited on
Commit
e2db78d
1 Parent(s): e18bb66

update adapter

Browse files
Files changed (2) hide show
  1. README.md +71 -0
  2. adapter_config.json +1 -1
README.md CHANGED
@@ -1,5 +1,6 @@
1
  ---
2
  library_name: peft
 
3
  ---
4
  ## Training procedure
5
 
@@ -24,5 +25,75 @@ The following `bitsandbytes` quantization config was used during training:
24
  - max_input_length: None
25
  ### Framework versions
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  - PEFT 0.5.0
 
1
  ---
2
  library_name: peft
3
+ pipeline_tag: text-generation
4
  ---
5
  ## Training procedure
6
 
 
25
  - max_input_length: None
26
  ### Framework versions
27
 
28
+ # Load model
29
+ ```python
30
+ from transformers import AutoModelForCausalLM, GPTQConfig
31
+ from peft import PeftModel
32
+
33
+ bnb_config = GPTQConfig(
34
+ bits=8,
35
+ disable_exllama=True,
36
+ )
37
+
38
+ _model = AutoModelForCausalLM.from_pretrained(
39
+ 'TheBloke/zephyr-7B-beta-GPTQ',
40
+ quantization_config=bnb_config,
41
+ device_map='auto',
42
+ revision='gptq-8bit-32g-actorder_True',
43
+ )
44
+
45
+ model = PeftModel.from_pretrained(_model, 'matheusrdgsf/cesar-ptbr')
46
+ ```
47
+
48
+ # Easy inference
49
+ ```python
50
+ from transformers import GenerationConfig
51
+ from transformers import AutoTokenizer
52
+
53
+ tokenizer_model = AutoTokenizer.from_pretrained('TheBloke/zephyr-7B-beta-GPTQ')
54
+ tokenizer_template = AutoTokenizer.from_pretrained('HuggingFaceH4/zephyr-7b-alpha')
55
+
56
+ generation_config = GenerationConfig(
57
+ do_sample=True,
58
+ temperature=0.1,
59
+ top_p=0.25,
60
+ top_k=0,
61
+ max_new_tokens=512,
62
+ repetition_penalty=1.1,
63
+ eos_token_id=tokenizer_model.eos_token_id,
64
+ pad_token_id=tokenizer_model.eos_token_id,
65
+ )
66
+
67
+
68
+ def get_inference(
69
+ text,
70
+ model,
71
+ tokenizer_model=tokenizer_model,
72
+ tokenizer_template=tokenizer_template,
73
+ generation_config=generation_config,
74
+ ):
75
+ st_time = time.time()
76
+ inputs = tokenizer_model(
77
+ tokenizer_template.apply_chat_template(
78
+ [
79
+ {
80
+ "role": "system",
81
+ "content": "Você é um chatbot para indicação de filmes. Responda de maneira educada sugestões de filmes para os usuários.",
82
+ },
83
+ {"role": "user", "content": text},
84
+ ],
85
+ tokenize=False,
86
+ ),
87
+ return_tensors="pt",
88
+ ).to("cuda")
89
+
90
+ outputs = model.generate(**inputs, generation_config=generation_config)
91
+
92
+ print('inference time:', time.time() - st_time)
93
+ return tokenizer_model.decode(outputs[0], skip_special_tokens=True).split('\n')[-1]
94
+
95
+ get_inference('Poderia indicar filmes de ação de até 2 horas?', model)
96
+ ```
97
+
98
 
99
  - PEFT 0.5.0
adapter_config.json CHANGED
@@ -12,7 +12,7 @@
12
  "modules_to_save": null,
13
  "peft_type": "LORA",
14
  "r": 16,
15
- "revision": null,
16
  "target_modules": [
17
  "q_proj",
18
  "v_proj"
 
12
  "modules_to_save": null,
13
  "peft_type": "LORA",
14
  "r": 16,
15
+ "revision": "gptq-8bit-32g-actorder_True",
16
  "target_modules": [
17
  "q_proj",
18
  "v_proj"