f15hb0wn commited on
Commit
9f4caa4
1 Parent(s): 06f0aa8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -18
app.py CHANGED
@@ -5,26 +5,15 @@ import gradio as gr
5
 
6
  model_id = "witfoo/witq-1.0"
7
  dtype = torch.float16 # float16 for Tesla T4, V100, bfloat16 for Ampere+
8
- # Use GPU if available
9
- device = torch.device("cuda" if torch.cuda.is_available() else "auto")
10
 
11
- tokenizer = AutoTokenizer.from_pretrained(model_id)
12
- try:
13
- model = AutoModelForCausalLM.from_pretrained(
14
- model_id,
15
- torch_dtype=dtype,
16
- device_map=device,
17
- )
18
- except:
19
- if device == "cuda":
20
- print("Failed to load model on GPU. Loading on CPU...")
21
- device = "auto"
22
- model = AutoModelForCausalLM.from_pretrained(
23
- model_id,
24
- torch_dtype=dtype,
25
- device_map=device,
26
- )
27
 
 
 
 
 
 
 
28
 
29
  preamble = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request."
30
 
 
5
 
6
  model_id = "witfoo/witq-1.0"
7
  dtype = torch.float16 # float16 for Tesla T4, V100, bfloat16 for Ampere+
 
 
8
 
9
+ device = "auto"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
12
+ model = AutoModelForCausalLM.from_pretrained(
13
+ model_id,
14
+ torch_dtype=dtype,
15
+ device_map=device,
16
+ )
17
 
18
  preamble = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request."
19