Nemo-Mistral-Minitron

Running on Zero

Tonic commited on Oct 3

Commit

55b91e5

•

1 Parent(s): a3cff49

fix tensors

Files changed (1) hide show

app.py CHANGED Viewed

@@ -39,15 +39,17 @@ def generate_response(message, history, system_message, max_tokens, temperature,
         response = pipe(full_prompt, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, do_sample=True)[0]['generated_text']
     else:
         inputs = tokenizer(full_prompt, return_tensors="pt", padding=True, truncation=True)
-        inputs = {k: v.to(model.device) for k, v in inputs.items()}
         with torch.no_grad():
             output_ids = model.generate(
-                inputs.input_ids,
                 max_new_tokens=max_tokens,
                 temperature=temperature,
                 top_p=top_p,
                 do_sample=True,
-                attention_mask=inputs['attention_mask']
             )
         response = tokenizer.decode(output_ids[0], skip_special_tokens=True)

         response = pipe(full_prompt, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, do_sample=True)[0]['generated_text']
     else:
         inputs = tokenizer(full_prompt, return_tensors="pt", padding=True, truncation=True)
+        input_ids = inputs['input_ids'].to(model.device)
+        attention_mask = inputs['attention_mask'].to(model.device)
         with torch.no_grad():
             output_ids = model.generate(
+                input_ids,
                 max_new_tokens=max_tokens,
                 temperature=temperature,
                 top_p=top_p,
                 do_sample=True,
+                attention_mask=attention_mask
             )
         response = tokenizer.decode(output_ids[0], skip_special_tokens=True)