doubledsbv commited on
Commit
5239190
1 Parent(s): 7040f84

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +112 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import runpod
3
+ import os
4
+ from dotenv import load_dotenv
5
+ load_dotenv()
6
+
7
+
8
+ runpod.api_key = os.getenv("RUNPOD_API_KEY")
9
+ endpoint_id = os.getenv("ENDPOINT_ID")
10
+
11
+ def format_prompt(message, history):
12
+ messages = []
13
+
14
+ messages.append({"role": "system", "content": "Du bist ein freundlicher, hilfsbereiter Chatbot, der gerne Fragen korrekt und präzise beantwortet."})
15
+ for user_prompt, bot_response in history:
16
+ messages.append({"role": "user", "content": user_prompt})
17
+ messages.append({"role": "assistant", "content": bot_response})
18
+ messages.append({"role": "user", "content": message})
19
+ return messages
20
+
21
+
22
+ def generate(
23
+ message: str,
24
+ history: list = [],
25
+ temperature=0.7,
26
+ max_tokens=1024,
27
+ top_p=0.95,
28
+ ):
29
+
30
+ formatted_messages = format_prompt(message, history)
31
+
32
+ temperature = float(temperature)
33
+ temperature = max(temperature, 1e-2)
34
+ top_p = float(top_p)
35
+ top_k = 50
36
+ max_tokens = int(max_tokens)
37
+
38
+ input_payload = {
39
+ "input": {
40
+ "messages": formatted_messages,
41
+ "sampling_params": {
42
+ "temperature": temperature,
43
+ "top_p": top_p,
44
+ "top_k": top_k,
45
+ "max_tokens": max_tokens
46
+ },
47
+ }
48
+ }
49
+
50
+ try:
51
+ endpoint = runpod.Endpoint(os.getenv("ENDPOINT_ID"))
52
+ run_request = endpoint.run(input_payload)
53
+ stream_output = ''
54
+ for output in run_request.stream():
55
+ for t in output['choices'][0]['tokens']:
56
+ stream_output += t.strip()
57
+ yield stream_output
58
+ #print (t.strip())
59
+ return stream_output
60
+
61
+
62
+ except Exception as e:
63
+ print(f"An error occurred: {e}")
64
+
65
+ additional_inputs=[
66
+ gr.Slider(
67
+ label="Temperature",
68
+ value=0.7,
69
+ minimum=0.0,
70
+ maximum=1.0,
71
+ step=0.05,
72
+ interactive=True,
73
+ info="Higher values produce more diverse outputs",
74
+ ),
75
+ gr.Slider(
76
+ label="Max new tokens",
77
+ value=1024,
78
+ minimum=0,
79
+ maximum=2048,
80
+ step=64,
81
+ interactive=True,
82
+ info="The maximum numbers of new tokens",
83
+ ),
84
+ gr.Slider(
85
+ label="Top-p (nucleus sampling)",
86
+ value=0.90,
87
+ minimum=0.0,
88
+ maximum=1,
89
+ step=0.05,
90
+ interactive=True,
91
+ info="Higher values sample more low-probability tokens",
92
+ )
93
+ ]
94
+
95
+ css = """
96
+ #mkd {
97
+ height: 500px;
98
+ overflow: auto;
99
+ border: 1px solid #ccc;
100
+ }
101
+ """
102
+
103
+ with gr.Blocks(css=css) as demo:
104
+
105
+ gr.ChatInterface(
106
+ generate,
107
+ additional_inputs=additional_inputs,
108
+ title="Demo Kafka-7B-DARE-TIES-QLoRa-LaserRMT-DPO",
109
+ examples=[["Was ist der Sinn des Lebens?"], ["Was ist der Unterschied zwischen Quantenmechanik und Relativitätstheorie?"]]
110
+ )
111
+
112
+ demo.queue().launch(max_threads=100, debug=True)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ runpod
2
+ python-dotenv
3
+ gradio