patched-chat

Running on Zero

codelion commited on Apr 16

Commit

e1ed375

•

1 Parent(s): 5084686

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,8 +7,8 @@ import spaces
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
-MAX_MAX_NEW_TOKENS = 2048
-DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 DESCRIPTION = """\
@@ -31,16 +31,16 @@ if torch.cuda.is_available():
     tokenizer.use_default_system_prompt = False
-@spaces.GPU
 def generate(
     message: str,
     chat_history: list[tuple[str, str]],
     system_prompt: str,
     max_new_tokens: int = 1024,
-    temperature: float = 0.6,
-    top_p: float = 0.9,
-    top_k: int = 50,
-    repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
     conversation = []
     if system_prompt:
@@ -227,28 +227,14 @@ chat_interface = gr.ChatInterface(
             minimum=0.1,
             maximum=4.0,
             step=0.1,
-            value=0.6,
         ),
         gr.Slider(
             label="Top-p (nucleus sampling)",
             minimum=0.05,
             maximum=1.0,
             step=0.05,
-            value=0.9,
-        ),
-        gr.Slider(
-            label="Top-k",
-            minimum=1,
-            maximum=1000,
-            step=1,
-            value=50,
-        ),
-        gr.Slider(
-            label="Repetition penalty",
-            minimum=1.0,
-            maximum=2.0,
-            step=0.05,
-            value=1.2,
         ),
     ],
     stop_btn=None,

 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+MAX_MAX_NEW_TOKENS = 1024
+DEFAULT_MAX_NEW_TOKENS = 512
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 DESCRIPTION = """\
     tokenizer.use_default_system_prompt = False
+@spaces.GPU(duration=120)
 def generate(
     message: str,
     chat_history: list[tuple[str, str]],
     system_prompt: str,
     max_new_tokens: int = 1024,
+    temperature: float = 0.2,
+    top_p: float = 0.95,
+    # top_k: int = 50,
+    # repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
     conversation = []
     if system_prompt:
             minimum=0.1,
             maximum=4.0,
             step=0.1,
+            value=0.2,
         ),
         gr.Slider(
             label="Top-p (nucleus sampling)",
             minimum=0.05,
             maximum=1.0,
             step=0.05,
+            value=0.95,
         ),
     ],
     stop_btn=None,