matheusrdgsf commited on
Commit
dfe6e8f
1 Parent(s): 95444e3

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +71 -1
README.md CHANGED
@@ -1,5 +1,13 @@
1
  ---
2
  library_name: peft
 
 
 
 
 
 
 
 
3
  ---
4
  ## Training procedure
5
 
@@ -25,4 +33,66 @@ The following `bitsandbytes` quantization config was used during training:
25
  ### Framework versions
26
 
27
 
28
- - PEFT 0.5.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  library_name: peft
3
+ base_model: TheBloke/zephyr-7B-beta-GPTQ
4
+ revision: gptq-8bit-32g-actorder_True
5
+ license: mit
6
+ language:
7
+ - pt
8
+ tags:
9
+ - gptq
10
+ - ptbr
11
  ---
12
  ## Training procedure
13
 
 
33
  ### Framework versions
34
 
35
 
36
+
37
+ # Load model AutoModel
38
+ ```python
39
+ from peft import PeftModel, PeftConfig
40
+ from transformers import AutoModelForCausalLM
41
+
42
+ config = PeftConfig.from_pretrained("matheusrdgsf/cesar-ptbr")
43
+ model = AutoModelForCausalLM.from_pretrained("TheBloke/zephyr-7B-beta-GPTQ", revision="gptq-8bit-32g-actorder_True", device_map='auto')
44
+ model = PeftModel.from_pretrained(model, "matheusrdgsf/cesar-ptbr")
45
+ ```
46
+
47
+ # Easy inference
48
+ ```python
49
+ from transformers import GenerationConfig
50
+ from transformers import AutoTokenizer
51
+
52
+ tokenizer_model = AutoTokenizer.from_pretrained('TheBloke/zephyr-7B-beta-GPTQ')
53
+ tokenizer_template = AutoTokenizer.from_pretrained('HuggingFaceH4/zephyr-7b-alpha')
54
+
55
+ generation_config = GenerationConfig(
56
+ do_sample=True,
57
+ temperature=0.1,
58
+ top_p=0.25,
59
+ top_k=0,
60
+ max_new_tokens=512,
61
+ repetition_penalty=1.1,
62
+ eos_token_id=tokenizer_model.eos_token_id,
63
+ pad_token_id=tokenizer_model.eos_token_id,
64
+ )
65
+
66
+
67
+ def get_inference(
68
+ text,
69
+ model,
70
+ tokenizer_model=tokenizer_model,
71
+ tokenizer_template=tokenizer_template,
72
+ generation_config=generation_config,
73
+ ):
74
+ st_time = time.time()
75
+ inputs = tokenizer_model(
76
+ tokenizer_template.apply_chat_template(
77
+ [
78
+ {
79
+ "role": "system",
80
+ "content": "Você é um chatbot para indicação de filmes. Responda de maneira educada sugestões de filmes para os usuários.",
81
+ },
82
+ {"role": "user", "content": text},
83
+ ],
84
+ tokenize=False,
85
+ ),
86
+ return_tensors="pt",
87
+ ).to("cuda")
88
+
89
+ outputs = model.generate(**inputs, generation_config=generation_config)
90
+
91
+ print('inference time:', time.time() - st_time)
92
+ return tokenizer_model.decode(outputs[0], skip_special_tokens=True).split('\n')[-1]
93
+
94
+ get_inference('Poderia indicar filmes de ação de até 2 horas?', model)
95
+ ```
96
+
97
+
98
+ - PEFT 0.5.0