f15hb0wn commited on
Commit
06f0aa8
1 Parent(s): 0a79ce5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -7
app.py CHANGED
@@ -1,17 +1,29 @@
 
1
  from transformers import AutoTokenizer, AutoModelForCausalLM
2
  import torch
3
  import gradio as gr
4
 
5
  model_id = "witfoo/witq-1.0"
6
  dtype = torch.float16 # float16 for Tesla T4, V100, bfloat16 for Ampere+
7
- tokenizer = AutoTokenizer.from_pretrained(model_id)
8
  # Use GPU if available
9
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
- model = AutoModelForCausalLM.from_pretrained(
11
- model_id,
12
- torch_dtype=dtype,
13
- device_map=device,
14
- )
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
 
17
  preamble = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request."
 
1
+ ## Chatbot for interacting with WitFoo's Opensource model with standard Transformers. Can run on GPU or CPU.
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
  import gradio as gr
5
 
6
  model_id = "witfoo/witq-1.0"
7
  dtype = torch.float16 # float16 for Tesla T4, V100, bfloat16 for Ampere+
 
8
  # Use GPU if available
9
+ device = torch.device("cuda" if torch.cuda.is_available() else "auto")
10
+
11
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
12
+ try:
13
+ model = AutoModelForCausalLM.from_pretrained(
14
+ model_id,
15
+ torch_dtype=dtype,
16
+ device_map=device,
17
+ )
18
+ except:
19
+ if device == "cuda":
20
+ print("Failed to load model on GPU. Loading on CPU...")
21
+ device = "auto"
22
+ model = AutoModelForCausalLM.from_pretrained(
23
+ model_id,
24
+ torch_dtype=dtype,
25
+ device_map=device,
26
+ )
27
 
28
 
29
  preamble = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request."