Spaces:
Running
on
Zero
Running
on
Zero
fix tensors
Browse files
app.py
CHANGED
@@ -39,15 +39,17 @@ def generate_response(message, history, system_message, max_tokens, temperature,
|
|
39 |
response = pipe(full_prompt, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, do_sample=True)[0]['generated_text']
|
40 |
else:
|
41 |
inputs = tokenizer(full_prompt, return_tensors="pt", padding=True, truncation=True)
|
42 |
-
|
|
|
|
|
43 |
with torch.no_grad():
|
44 |
output_ids = model.generate(
|
45 |
-
|
46 |
max_new_tokens=max_tokens,
|
47 |
temperature=temperature,
|
48 |
top_p=top_p,
|
49 |
do_sample=True,
|
50 |
-
attention_mask=
|
51 |
)
|
52 |
|
53 |
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
|
|
39 |
response = pipe(full_prompt, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, do_sample=True)[0]['generated_text']
|
40 |
else:
|
41 |
inputs = tokenizer(full_prompt, return_tensors="pt", padding=True, truncation=True)
|
42 |
+
input_ids = inputs['input_ids'].to(model.device)
|
43 |
+
attention_mask = inputs['attention_mask'].to(model.device)
|
44 |
+
|
45 |
with torch.no_grad():
|
46 |
output_ids = model.generate(
|
47 |
+
input_ids,
|
48 |
max_new_tokens=max_tokens,
|
49 |
temperature=temperature,
|
50 |
top_p=top_p,
|
51 |
do_sample=True,
|
52 |
+
attention_mask=attention_mask
|
53 |
)
|
54 |
|
55 |
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|