Mistral-lab

Running on Zero

App Files Files Community

vilarin commited on 20 days ago

Commit

970d940

•

1 Parent(s): 12a34f8

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -23

app.py CHANGED Viewed

@@ -5,6 +5,16 @@ import torch
 import gradio as gr
 from threading import Thread
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 TITLE = "<h1><center>Mistral-lab</center></h1>"
@@ -15,25 +25,32 @@ PLACEHOLDER = """
 </center>
 """
-from huggingface_hub import snapshot_download
-from pathlib import Path
 mistral_models_path = Path.home().joinpath('mistral_models', '8B-Instruct')
 mistral_models_path.mkdir(parents=True, exist_ok=True)
 snapshot_download(repo_id="mistralai/Ministral-8B-Instruct-2410", allow_patterns=["params.json", "consolidated.safetensors", "tekken.json"], local_dir=mistral_models_path)
-from mistral_inference.transformer import Transformer
-from mistral_inference.generate import generate
-from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
-from mistral_common.protocol.instruct.messages import AssistantMessage, UserMessage
-from mistral_common.protocol.instruct.request import ChatCompletionRequest
-device = "cuda" # for GPU usage or "cpu" for CPU usage
 tokenizer = MistralTokenizer.from_file(f"{mistral_models_path}/tekken.json")
-model = Transformer.from_folder(mistral_models_path)
 @spaces.GPU()
@@ -64,12 +81,23 @@ def stream_chat(
         eos_id=tokenizer.instruct_tokenizer.tokenizer.eos_id)
     result = tokenizer.instruct_tokenizer.tokenizer.decode(out_tokens[0])
-    return result
-chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)
-with gr.Blocks(theme="ocean") as demo:
     gr.HTML(TITLE)
     gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
     gr.ChatInterface(
@@ -95,13 +123,6 @@ with gr.Blocks(theme="ocean") as demo:
                 render=False,
             ),
         ],
-        examples=[
-            ["Help me study vocabulary: write a sentence for me to fill in the blank, and I'll try to pick the correct option."],
-            ["What are 5 creative things I could do with my kids' art? I don't want to throw them away, but it's also so much clutter."],
-            ["Tell me a random fun fact about the Roman Empire."],
-            ["Show me a code snippet of a website's sticky header in CSS and JavaScript."],
-        ],
-        cache_examples=False,
     )

 import gradio as gr
 from threading import Thread
+from huggingface_hub import snapshot_download
+from pathlib import Path
+from mistral_inference.transformer import Transformer
+from mistral_inference.generate import generate
+from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
+from mistral_common.protocol.instruct.messages import AssistantMessage, UserMessage
+from mistral_common.protocol.instruct.request import ChatCompletionRequest
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 TITLE = "<h1><center>Mistral-lab</center></h1>"
 </center>
 """
+CSS = """
+.duplicate-button {
+    margin: auto !important;
+    color: white !important;
+    background: black !important;
+    border-radius: 100vh !important;
+}
+h3 {
+    text-align: center;
+}
+"""
+# download model
 mistral_models_path = Path.home().joinpath('mistral_models', '8B-Instruct')
 mistral_models_path.mkdir(parents=True, exist_ok=True)
 snapshot_download(repo_id="mistralai/Ministral-8B-Instruct-2410", allow_patterns=["params.json", "consolidated.safetensors", "tekken.json"], local_dir=mistral_models_path)
+# tokenizer
+device = "cuda" if torch.cuda.is_available() else "cpu" # for GPU usage or "cpu" for CPU usage
 tokenizer = MistralTokenizer.from_file(f"{mistral_models_path}/tekken.json")
+model = Transformer.from_folder(
+    mistral_models_path,
+    device=device,
+    dtype=torch.bfloat16)
 @spaces.GPU()
         eos_id=tokenizer.instruct_tokenizer.tokenizer.eos_id)
     result = tokenizer.instruct_tokenizer.tokenizer.decode(out_tokens[0])
+    for i in range(len(result)):
+        time.sleep(0.05)
+        yield result[: i + 1]
+chatbot = gr.Chatbot(
+    height=600,
+    placeholder=PLACEHOLDER,
+    examples=[
+        {"text": "Help me study vocabulary: write a sentence for me to fill in the blank, and I'll try to pick the correct option."},
+        {"text": "What are 5 creative things I could do with my kids' art? I don't want to throw them away, but it's also so much clutter."},
+        {"text": "Tell me a random fun fact about the Roman Empire."},
+        {"text": "Show me a code snippet of a website's sticky header in CSS and JavaScript."},
+        ],
+)
+with gr.Blocks(theme="ocean", css=CSS) as demo:
     gr.HTML(TITLE)
     gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
     gr.ChatInterface(
                 render=False,
             ),
         ],
     )