Spaces:

witfoo
/

WitQ

Running on L4

f15hb0wn commited on 8 days ago

Commit

06f0aa8

•

1 Parent(s): 0a79ce5

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,17 +1,29 @@
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 import gradio as gr
 model_id = "witfoo/witq-1.0"
 dtype = torch.float16 # float16 for Tesla T4, V100, bfloat16 for Ampere+
-tokenizer = AutoTokenizer.from_pretrained(model_id)
 # Use GPU if available
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model = AutoModelForCausalLM.from_pretrained(
-    model_id,
-    torch_dtype=dtype,
-    device_map=device,
-)
 preamble = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request."

+## Chatbot for interacting with WitFoo's Opensource model with standard Transformers. Can run on GPU or CPU.
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 import gradio as gr
 model_id = "witfoo/witq-1.0"
 dtype = torch.float16 # float16 for Tesla T4, V100, bfloat16 for Ampere+
 # Use GPU if available
+device = torch.device("cuda" if torch.cuda.is_available() else "auto")
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+try:
+    model = AutoModelForCausalLM.from_pretrained(
+        model_id,
+        torch_dtype=dtype,
+        device_map=device,
+    )
+except:
+    if device == "cuda":
+        print("Failed to load model on GPU. Loading on CPU...")
+        device = "auto"
+    model = AutoModelForCausalLM.from_pretrained(
+        model_id,
+        torch_dtype=dtype,
+        device_map=device,
+    )
 preamble = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request."