Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,12 +1,11 @@
|
|
1 |
import os
|
2 |
-
import threading as Thread
|
3 |
import time
|
4 |
import spaces
|
5 |
import torch
|
6 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
7 |
import gradio as gr
|
8 |
|
9 |
-
|
10 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
11 |
MODEL_ID = os.environ.get("MODEL_ID", None)
|
12 |
MODEL_NAME = MODEL_ID.split("/")[-1]
|
@@ -44,40 +43,21 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
|
|
44 |
model = model.eval()
|
45 |
|
46 |
@spaces.GPU()
|
47 |
-
def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
|
48 |
-
|
49 |
-
for prompt, answer in history:
|
50 |
-
conversation.extend([
|
51 |
-
{"role": "user", "content": prompt},
|
52 |
-
{"role": "assistant", "content": answer},
|
53 |
-
])
|
54 |
-
conversation.append({"role": "user", "content": message})
|
55 |
-
|
56 |
-
print(f"Conversation is -\n{conversation}")
|
57 |
-
|
58 |
-
input_ids = tokenizer.apply_chat_template(conversation, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(model.device)
|
59 |
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
top_p=top_p,
|
67 |
-
top_k=top_k,
|
68 |
-
|
69 |
-
do_sample=True,
|
70 |
-
temperature=temperature,
|
71 |
-
eos_token_id = [2,92542],
|
72 |
)
|
73 |
-
|
74 |
-
thread = Thread(target=model.generate, kwargs=generate_kwargs)
|
75 |
-
thread.start()
|
76 |
|
77 |
-
|
78 |
-
for new_text in streamer:
|
79 |
-
buffer += new_text
|
80 |
-
yield buffer
|
81 |
|
82 |
|
83 |
chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)
|
|
|
1 |
import os
|
|
|
2 |
import time
|
3 |
import spaces
|
4 |
import torch
|
5 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
6 |
import gradio as gr
|
7 |
|
8 |
+
MODEL_LIST = ["internlm/internlm2_5-7b-chat", "internlm/internlm2_5-7b-chat-1m"]
|
9 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
10 |
MODEL_ID = os.environ.get("MODEL_ID", None)
|
11 |
MODEL_NAME = MODEL_ID.split("/")[-1]
|
|
|
43 |
model = model.eval()
|
44 |
|
45 |
@spaces.GPU()
|
46 |
+
def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
|
47 |
+
print(history)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
+
resp, hist = model.stream_chat(
|
50 |
+
tokenizer,
|
51 |
+
query = message,
|
52 |
+
history = history,
|
53 |
+
max_new_tokens = max_new_tokens,
|
54 |
+
do_sample = True if temperature == 0 else False
|
55 |
+
top_p = top_p,
|
56 |
+
top_k = top_k,
|
57 |
+
temperature = temperature,
|
|
|
|
|
|
|
58 |
)
|
|
|
|
|
|
|
59 |
|
60 |
+
yield resp
|
|
|
|
|
|
|
61 |
|
62 |
|
63 |
chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)
|