codelion commited on
Commit
e1ed375
1 Parent(s): 5084686

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -23
app.py CHANGED
@@ -7,8 +7,8 @@ import spaces
7
  import torch
8
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
9
 
10
- MAX_MAX_NEW_TOKENS = 2048
11
- DEFAULT_MAX_NEW_TOKENS = 1024
12
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
13
 
14
  DESCRIPTION = """\
@@ -31,16 +31,16 @@ if torch.cuda.is_available():
31
  tokenizer.use_default_system_prompt = False
32
 
33
 
34
- @spaces.GPU
35
  def generate(
36
  message: str,
37
  chat_history: list[tuple[str, str]],
38
  system_prompt: str,
39
  max_new_tokens: int = 1024,
40
- temperature: float = 0.6,
41
- top_p: float = 0.9,
42
- top_k: int = 50,
43
- repetition_penalty: float = 1.2,
44
  ) -> Iterator[str]:
45
  conversation = []
46
  if system_prompt:
@@ -227,28 +227,14 @@ chat_interface = gr.ChatInterface(
227
  minimum=0.1,
228
  maximum=4.0,
229
  step=0.1,
230
- value=0.6,
231
  ),
232
  gr.Slider(
233
  label="Top-p (nucleus sampling)",
234
  minimum=0.05,
235
  maximum=1.0,
236
  step=0.05,
237
- value=0.9,
238
- ),
239
- gr.Slider(
240
- label="Top-k",
241
- minimum=1,
242
- maximum=1000,
243
- step=1,
244
- value=50,
245
- ),
246
- gr.Slider(
247
- label="Repetition penalty",
248
- minimum=1.0,
249
- maximum=2.0,
250
- step=0.05,
251
- value=1.2,
252
  ),
253
  ],
254
  stop_btn=None,
 
7
  import torch
8
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
9
 
10
+ MAX_MAX_NEW_TOKENS = 1024
11
+ DEFAULT_MAX_NEW_TOKENS = 512
12
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
13
 
14
  DESCRIPTION = """\
 
31
  tokenizer.use_default_system_prompt = False
32
 
33
 
34
+ @spaces.GPU(duration=120)
35
  def generate(
36
  message: str,
37
  chat_history: list[tuple[str, str]],
38
  system_prompt: str,
39
  max_new_tokens: int = 1024,
40
+ temperature: float = 0.2,
41
+ top_p: float = 0.95,
42
+ # top_k: int = 50,
43
+ # repetition_penalty: float = 1.2,
44
  ) -> Iterator[str]:
45
  conversation = []
46
  if system_prompt:
 
227
  minimum=0.1,
228
  maximum=4.0,
229
  step=0.1,
230
+ value=0.2,
231
  ),
232
  gr.Slider(
233
  label="Top-p (nucleus sampling)",
234
  minimum=0.05,
235
  maximum=1.0,
236
  step=0.05,
237
+ value=0.95,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
  ),
239
  ],
240
  stop_btn=None,