Mistral-lab

Running on Zero

vilarin commited on Jul 9

Commit

be427ef

•

1 Parent(s): 285cc01

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -36,10 +36,9 @@ h3 {
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
     torch_dtype=torch.bfloat16,
-    device_map='auto',
     low_cpu_mem_usage=True,
     trust_remote_code=True)
-tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 def stream_chat(
@@ -125,7 +124,7 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
             ["Tell me a random fun fact about the Roman Empire."],
             ["Show me a code snippet of a website's sticky header in CSS and JavaScript."],
         ],
-        cache_examples=False,
     )

 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
     torch_dtype=torch.bfloat16,
     low_cpu_mem_usage=True,
     trust_remote_code=True)
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16)
 def stream_chat(
             ["Tell me a random fun fact about the Roman Empire."],
             ["Show me a code snippet of a website's sticky header in CSS and JavaScript."],
         ],
+        cache_examples="lazy",
     )