Spaces:
Runtime error
Runtime error
Stop generation at at `\nHuman` and `\n----` in chat mode
Browse files
app.py
CHANGED
@@ -89,6 +89,8 @@ def generate(prompt, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition
|
|
89 |
do_sample=True,
|
90 |
seed=42,
|
91 |
)
|
|
|
|
|
92 |
|
93 |
if chat_mode and FIM_INDICATOR in prompt:
|
94 |
raise ValueError("Chat mode and FIM are mutually exclusive. Choose one or the other.")
|
@@ -114,11 +116,15 @@ def generate(prompt, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition
|
|
114 |
else:
|
115 |
output = prompt
|
116 |
|
|
|
117 |
for response in stream:
|
118 |
if fim_mode and response.token.text =="<|endoftext|>":
|
119 |
output += (suffix + "\n" + response.token.text)
|
|
|
|
|
120 |
else:
|
121 |
output += response.token.text
|
|
|
122 |
yield output
|
123 |
return output
|
124 |
|
|
|
89 |
do_sample=True,
|
90 |
seed=42,
|
91 |
)
|
92 |
+
if chat_mode:
|
93 |
+
generate_kwargs.update({"stop_sequences": ["\nHuman", "\n-----"]})
|
94 |
|
95 |
if chat_mode and FIM_INDICATOR in prompt:
|
96 |
raise ValueError("Chat mode and FIM are mutually exclusive. Choose one or the other.")
|
|
|
116 |
else:
|
117 |
output = prompt
|
118 |
|
119 |
+
previous_token = ""
|
120 |
for response in stream:
|
121 |
if fim_mode and response.token.text =="<|endoftext|>":
|
122 |
output += (suffix + "\n" + response.token.text)
|
123 |
+
elif chat_mode and response.token.text in ["Human", "-----"] and previous_token=="\n":
|
124 |
+
return output
|
125 |
else:
|
126 |
output += response.token.text
|
127 |
+
previous_token = response.token.text
|
128 |
yield output
|
129 |
return output
|
130 |
|