Mistral-lab

Runtime error

App Files Files Community

vilarin commited on Jul 18

Commit

d381360

•

1 Parent(s): 22d8950

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -33

app.py CHANGED Viewed

@@ -1,15 +1,16 @@
 import os
 import time
-#import spaces
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 import gradio as gr
 from threading import Thread
-MODEL_LIST = ["HuggingFaceTB/SmolLM-1.7B-Instruct", "HuggingFaceTB/SmolLM-135M-Instruct", "HuggingFaceTB/SmolLM-360M-Instruct"]
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
-TITLE = "<h1><center>SmolLM-Instruct</center></h1>"
 PLACEHOLDER = """
 <center>
@@ -30,21 +31,12 @@ h3 {
 }
 """
-# pip install transformers
-from transformers import AutoModelForCausalLM, AutoTokenizer
-device = "cpu" # for GPU usage or "cpu" for CPU usage
-tokenizer0 = AutoTokenizer.from_pretrained(MODEL_LIST[0])
-model0 = AutoModelForCausalLM.from_pretrained(MODEL_LIST[0]).to(device)
-tokenizer1 = AutoTokenizer.from_pretrained(MODEL_LIST[1])
-model1 = AutoModelForCausalLM.from_pretrained(MODEL_LIST[1]).to(device)
-tokenizer2 = AutoTokenizer.from_pretrained(MODEL_LIST[2])
-model2 = AutoModelForCausalLM.from_pretrained(MODEL_LIST[2]).to(device)
-#@spaces.GPU()
 def stream_chat(
     message: str,
     history: list,
@@ -53,7 +45,6 @@ def stream_chat(
     top_p: float = 1.0,
     top_k: int = 20,
     penalty: float = 1.2,
-    choice: str = "135M"
 ):
     print(f'message: {message}')
     print(f'history: {history}')
@@ -67,16 +58,6 @@ def stream_chat(
     conversation.append({"role": "user", "content": message})
-    if choice == "1.7B":
-        tokenizer = tokenizer0
-        model = model0
-    elif choice == "135M":
-        model = model1
-        tokenizer = tokenizer1
-    else:
-        model = model2
-        tokenizer = tokenizer2
     input_text=tokenizer.apply_chat_template(conversation, tokenize=False)
     inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
     streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
@@ -154,12 +135,6 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
                 label="Repetition penalty",
                 render=False,
             ),
-            gr.Radio(
-                ["135M", "360M", "1.7B"],
-                value="135M",
-                label="Load Model",
-                render=False,
-            ),
         ],
         examples=[
             ["Help me study vocabulary: write a sentence for me to fill in the blank, and I'll try to pick the correct option."],

 import os
 import time
+import spaces
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 import gradio as gr
 from threading import Thread
+MODEL_LIST = ["mistralai/Mistral-Nemo-Instruct-2407"]
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
+MODEL = os.environ.get("MODEL_ID")
+TITLE = "<h1><center>Mistral-Nemo</center></h1>"
 PLACEHOLDER = """
 <center>
 }
 """
+device = "cuda" # for GPU usage or "cpu" for CPU usage
+tokenizer = AutoTokenizer.from_pretrained(MODEL)
+model = AutoModelForCausalLM.from_pretrained(MODEL).to(device)
+@spaces.GPU()
 def stream_chat(
     message: str,
     history: list,
     top_p: float = 1.0,
     top_k: int = 20,
     penalty: float = 1.2,
 ):
     print(f'message: {message}')
     print(f'history: {history}')
     conversation.append({"role": "user", "content": message})
     input_text=tokenizer.apply_chat_template(conversation, tokenize=False)
     inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
     streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
                 label="Repetition penalty",
                 render=False,
             ),
         ],
         examples=[
             ["Help me study vocabulary: write a sentence for me to fill in the blank, and I'll try to pick the correct option."],