zyliu commited on
Commit
148979f
1 Parent(s): aa347cd

update app.py and model_worker.py

Browse files
Files changed (2) hide show
  1. app.py +853 -109
  2. model_worker.py +0 -3
app.py CHANGED
@@ -1,116 +1,860 @@
1
- import fire
2
- import subprocess
 
 
 
3
  import os
4
  import time
5
- import signal
6
- import subprocess
7
- import atexit
8
-
9
-
10
- def kill_processes_by_cmd_substring(cmd_substring):
11
- # execute `ps -ef` and obtain its output
12
- result = subprocess.run(["ps", "-ef"], stdout=subprocess.PIPE, text=True)
13
- lines = result.stdout.splitlines()
14
-
15
- # visit each line
16
- for line in lines:
17
- if cmd_substring in line:
18
- # extract PID
19
- parts = line.split()
20
- pid = int(parts[1])
21
- print(f"Killing process with PID: {pid}, CMD: {line}")
22
- os.kill(pid, signal.SIGTERM)
23
-
24
-
25
- def main(
26
- python_path="python",
27
- run_controller=True,
28
- run_worker=True,
29
- run_gradio=True,
30
- controller_port=10086,
31
- gradio_port=7860,
32
- worker_names=[
33
- "OpenGVLab/InternVL2-8B",
34
- ],
35
- run_sd_worker=False,
36
- **kwargs,
37
- ):
38
- host = "http://0.0.0.0"
39
- controller_process = None
40
- if run_controller:
41
- # python controller.py --host 0.0.0.0 --port 10086
42
- cmd_args = [
43
- f"{python_path}",
44
- "controller.py",
45
- "--host",
46
- "0.0.0.0",
47
- "--port",
48
- f"{controller_port}",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  ]
50
- kill_processes_by_cmd_substring(" ".join(cmd_args))
51
- print("Launching controller: ", " ".join(cmd_args))
52
- controller_process = subprocess.Popen(cmd_args)
53
- atexit.register(controller_process.terminate)
54
-
55
- worker_processes = []
56
- if run_worker:
57
- worker_port = 10088
58
- for worker_name in worker_names:
59
- cmd_args = [
60
- f"{python_path}",
61
- "model_worker.py",
62
- "--port",
63
- f"{worker_port}",
64
- "--controller-url",
65
- f"{host}:{controller_port}",
66
- "--model-path",
67
- f"{worker_name}",
68
- "--load-8bit",
69
- ]
70
- kill_processes_by_cmd_substring(" ".join(cmd_args))
71
- print("Launching worker: ", " ".join(cmd_args))
72
- worker_process = subprocess.Popen(cmd_args)
73
- worker_processes.append(worker_process)
74
- atexit.register(worker_process.terminate)
75
- worker_port += 1
76
-
77
- time.sleep(60)
78
- gradio_process = None
79
- if run_gradio:
80
- # python gradio_web_server.py --port 10088 --controller-url http://0.0.0.0:10086
81
- cmd_args = [
82
- f"{python_path}",
83
- "gradio_web_server.py",
84
- "--port",
85
- f"{gradio_port}",
86
- "--controller-url",
87
- f"{host}:{controller_port}",
88
- "--model-list-mode",
89
- "reload",
90
  ]
91
- kill_processes_by_cmd_substring(" ".join(cmd_args))
92
- print("Launching gradio: ", " ".join(cmd_args))
93
- gradio_process = subprocess.Popen(cmd_args)
94
- atexit.register(gradio_process.terminate)
95
-
96
- sd_worker_process = None
97
- if run_sd_worker:
98
- # python model_worker.py --port 10088 --controller-address http://
99
- cmd_args = [f"{python_path}", "sd_worker.py"]
100
- kill_processes_by_cmd_substring(" ".join(cmd_args))
101
- print("Launching sd_worker: ", " ".join(cmd_args))
102
- sd_worker_process = subprocess.Popen(cmd_args)
103
- atexit.register(sd_worker_process.terminate)
104
-
105
- for worker_process in worker_processes:
106
- worker_process.wait()
107
- if controller_process:
108
- controller_process.wait()
109
- if gradio_process:
110
- gradio_process.wait()
111
- if sd_worker_process:
112
- sd_worker_process.wait()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
 
115
  if __name__ == "__main__":
116
- fire.Fire(main)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import argparse
3
+ from ast import parse
4
+ import datetime
5
+ import json
6
  import os
7
  import time
8
+ import hashlib
9
+ import re
10
+
11
+ import gradio as gr
12
+ import requests
13
+ import random
14
+ from filelock import FileLock
15
+ from io import BytesIO
16
+ from PIL import Image, ImageDraw, ImageFont
17
+
18
+ from constants import LOGDIR
19
+ from utils import (
20
+ build_logger,
21
+ server_error_msg,
22
+ violates_moderation,
23
+ moderation_msg,
24
+ load_image_from_base64,
25
+ get_log_filename,
26
+ )
27
+ from conversation import Conversation
28
+
29
+ logger = build_logger("gradio_web_server", "gradio_web_server.log")
30
+
31
+ headers = {"User-Agent": "InternVL-Chat Client"}
32
+
33
+ no_change_btn = gr.Button()
34
+ enable_btn = gr.Button(interactive=True)
35
+ disable_btn = gr.Button(interactive=False)
36
+
37
+
38
+ @spaces.GPU(duration=10)
39
+ def make_zerogpu_happy():
40
+ pass
41
+
42
+
43
+ def write2file(path, content):
44
+ lock = FileLock(f"{path}.lock")
45
+ with lock:
46
+ with open(path, "a") as fout:
47
+ fout.write(content)
48
+
49
+
50
+ def sort_models(models):
51
+ def custom_sort_key(model_name):
52
+ # InternVL-Chat-V1-5 should be the first item
53
+ if model_name == "InternVL-Chat-V1-5":
54
+ return (1, model_name) # 1 indicates highest precedence
55
+ elif model_name.startswith("InternVL-Chat-V1-5-"):
56
+ return (1, model_name) # 1 indicates highest precedence
57
+ else:
58
+ return (0, model_name) # 0 indicates normal order
59
+
60
+ models.sort(key=custom_sort_key, reverse=True)
61
+ try: # We have five InternVL-Chat-V1-5 models, randomly choose one to be the first
62
+ first_three = models[:4]
63
+ random.shuffle(first_three)
64
+ models[:4] = first_three
65
+ except:
66
+ pass
67
+ return models
68
+
69
+
70
+ def get_model_list():
71
+ logger.info(f"Call `get_model_list`")
72
+ ret = requests.post(args.controller_url + "/refresh_all_workers")
73
+ logger.info(f"status_code from `get_model_list`: {ret.status_code}")
74
+ assert ret.status_code == 200
75
+ ret = requests.post(args.controller_url + "/list_models")
76
+ logger.info(f"status_code from `list_models`: {ret.status_code}")
77
+ models = ret.json()["models"]
78
+ models = sort_models(models)
79
+
80
+ logger.info(f"Models (from {args.controller_url}): {models}")
81
+ return models
82
+
83
+
84
+ get_window_url_params = """
85
+ function() {
86
+ const params = new URLSearchParams(window.location.search);
87
+ url_params = Object.fromEntries(params);
88
+ console.log(url_params);
89
+ return url_params;
90
+ }
91
+ """
92
+
93
+
94
+ def init_state(state=None):
95
+ if state is not None:
96
+ del state
97
+ return Conversation()
98
+
99
+
100
+ def find_bounding_boxes(state, response):
101
+ pattern = re.compile(r"<ref>\s*(.*?)\s*</ref>\s*<box>\s*(\[\[.*?\]\])\s*</box>")
102
+ matches = pattern.findall(response)
103
+ results = []
104
+ for match in matches:
105
+ results.append((match[0], eval(match[1])))
106
+ returned_image = None
107
+ latest_image = state.get_images(source=state.USER)[-1]
108
+ returned_image = latest_image.copy()
109
+ width, height = returned_image.size
110
+ draw = ImageDraw.Draw(returned_image)
111
+ for result in results:
112
+ line_width = max(1, int(min(width, height) / 200))
113
+ random_color = (
114
+ random.randint(0, 128),
115
+ random.randint(0, 128),
116
+ random.randint(0, 128),
117
+ )
118
+ category_name, coordinates = result
119
+ coordinates = [
120
+ (
121
+ float(x[0]) / 1000,
122
+ float(x[1]) / 1000,
123
+ float(x[2]) / 1000,
124
+ float(x[3]) / 1000,
125
+ )
126
+ for x in coordinates
127
  ]
128
+ coordinates = [
129
+ (
130
+ int(x[0] * width),
131
+ int(x[1] * height),
132
+ int(x[2] * width),
133
+ int(x[3] * height),
134
+ )
135
+ for x in coordinates
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  ]
137
+ for box in coordinates:
138
+ draw.rectangle(box, outline=random_color, width=line_width)
139
+ font = ImageFont.truetype("assets/SimHei.ttf", int(20 * line_width / 2))
140
+ text_size = font.getbbox(category_name)
141
+ text_width, text_height = (
142
+ text_size[2] - text_size[0],
143
+ text_size[3] - text_size[1],
144
+ )
145
+ text_position = (box[0], max(0, box[1] - text_height))
146
+ draw.rectangle(
147
+ [
148
+ text_position,
149
+ (text_position[0] + text_width, text_position[1] + text_height),
150
+ ],
151
+ fill=random_color,
152
+ )
153
+ draw.text(text_position, category_name, fill="white", font=font)
154
+ return returned_image if len(matches) > 0 else None
155
+
156
+
157
+ def query_image_generation(response, sd_worker_url, timeout=15):
158
+ if not sd_worker_url:
159
+ return None
160
+ sd_worker_url = f"{sd_worker_url}/generate_image/"
161
+ pattern = r"```drawing-instruction\n(.*?)\n```"
162
+ match = re.search(pattern, response, re.DOTALL)
163
+ if match:
164
+ payload = {"caption": match.group(1)}
165
+ print("drawing-instruction:", payload)
166
+ response = requests.post(sd_worker_url, json=payload, timeout=timeout)
167
+ response.raise_for_status() # 检查HTTP请求是否成功
168
+ image = Image.open(BytesIO(response.content))
169
+ return image
170
+ else:
171
+ return None
172
+
173
+
174
+ def load_demo(url_params, request: gr.Request = None):
175
+ if not request:
176
+ logger.info(f"load_demo. ip: {request.client.host}. params: {url_params}")
177
+
178
+ dropdown_update = gr.Dropdown(visible=True)
179
+ if "model" in url_params:
180
+ model = url_params["model"]
181
+ if model in models:
182
+ dropdown_update = gr.Dropdown(value=model, visible=True)
183
+
184
+ state = init_state()
185
+ return state, dropdown_update
186
+
187
+
188
+ def load_demo_refresh_model_list(request: gr.Request = None):
189
+ if not request:
190
+ logger.info(f"load_demo. ip: {request.client.host}")
191
+ models = get_model_list()
192
+ state = init_state()
193
+ dropdown_update = gr.Dropdown(
194
+ choices=models, value=models[0] if len(models) > 0 else ""
195
+ )
196
+ return state, dropdown_update
197
+
198
+
199
+ def vote_last_response(state, liked, model_selector, request: gr.Request):
200
+ conv_data = {
201
+ "tstamp": round(time.time(), 4),
202
+ "like": liked,
203
+ "model": model_selector,
204
+ "state": state.dict(),
205
+ "ip": request.client.host,
206
+ }
207
+ write2file(get_log_filename(), json.dumps(conv_data) + "\n")
208
+
209
+
210
+ def upvote_last_response(state, model_selector, request: gr.Request):
211
+ logger.info(f"upvote. ip: {request.client.host}")
212
+ vote_last_response(state, True, model_selector, request)
213
+ textbox = gr.MultimodalTextbox(value=None, interactive=True)
214
+ return (textbox,) + (disable_btn,) * 3
215
+
216
+
217
+ def downvote_last_response(state, model_selector, request: gr.Request):
218
+ logger.info(f"downvote. ip: {request.client.host}")
219
+ vote_last_response(state, False, model_selector, request)
220
+ textbox = gr.MultimodalTextbox(value=None, interactive=True)
221
+ return (textbox,) + (disable_btn,) * 3
222
+
223
+
224
+ def vote_selected_response(
225
+ state, model_selector, request: gr.Request, data: gr.LikeData
226
+ ):
227
+ logger.info(
228
+ f"Vote: {data.liked}, index: {data.index}, value: {data.value} , ip: {request.client.host}"
229
+ )
230
+ conv_data = {
231
+ "tstamp": round(time.time(), 4),
232
+ "like": data.liked,
233
+ "index": data.index,
234
+ "model": model_selector,
235
+ "state": state.dict(),
236
+ "ip": request.client.host,
237
+ }
238
+ write2file(get_log_filename(), json.dumps(conv_data) + "\n")
239
+ return
240
+
241
+
242
+ def flag_last_response(state, model_selector, request: gr.Request):
243
+ logger.info(f"flag. ip: {request.client.host}")
244
+ vote_last_response(state, "flag", model_selector, request)
245
+ textbox = gr.MultimodalTextbox(value=None, interactive=True)
246
+ return (textbox,) + (disable_btn,) * 3
247
+
248
+
249
+ def regenerate(state, image_process_mode, request: gr.Request):
250
+ logger.info(f"regenerate. ip: {request.client.host}")
251
+ # state.messages[-1][-1] = None
252
+ state.update_message(Conversation.ASSISTANT, None, -1)
253
+ prev_human_msg = state.messages[-2]
254
+ if type(prev_human_msg[1]) in (tuple, list):
255
+ prev_human_msg[1] = (*prev_human_msg[1][:2], image_process_mode)
256
+ state.skip_next = False
257
+ textbox = gr.MultimodalTextbox(value=None, interactive=True)
258
+ return (state, state.to_gradio_chatbot(), textbox) + (disable_btn,) * 5
259
+
260
+
261
+ def clear_history(request: gr.Request):
262
+ logger.info(f"clear_history. ip: {request.client.host}")
263
+ state = init_state()
264
+ textbox = gr.MultimodalTextbox(value=None, interactive=True)
265
+ return (state, state.to_gradio_chatbot(), textbox) + (disable_btn,) * 5
266
+
267
+
268
+ def change_system_prompt(state, system_prompt, request: gr.Request):
269
+ logger.info(f"Change system prompt. ip: {request.client.host}")
270
+ state.set_system_message(system_prompt)
271
+ return state
272
+
273
+
274
+ def add_text(state, message, system_prompt, model_selector, request: gr.Request):
275
+ print(f"state: {state}")
276
+ if not state:
277
+ state, model_selector = load_demo_refresh_model_list(request)
278
+ images = message.get("files", [])
279
+ text = message.get("text", "").strip()
280
+ logger.info(f"add_text. ip: {request.client.host}. len: {len(text)}")
281
+ # import pdb; pdb.set_trace()
282
+ textbox = gr.MultimodalTextbox(value=None, interactive=False)
283
+ if len(text) <= 0 and len(images) == 0:
284
+ state.skip_next = True
285
+ return (state, state.to_gradio_chatbot(), textbox) + (no_change_btn,) * 5
286
+ if args.moderate:
287
+ flagged = violates_moderation(text)
288
+ if flagged:
289
+ state.skip_next = True
290
+ textbox = gr.MultimodalTextbox(
291
+ value={"text": moderation_msg}, interactive=True
292
+ )
293
+ return (state, state.to_gradio_chatbot(), textbox) + (no_change_btn,) * 5
294
+ images = [Image.open(path).convert("RGB") for path in images]
295
+
296
+ if len(images) > 0 and len(state.get_images(source=state.USER)) > 0:
297
+ state = init_state(state)
298
+ state.set_system_message(system_prompt)
299
+ state.append_message(Conversation.USER, text, images)
300
+ state.skip_next = False
301
+ return (state, state.to_gradio_chatbot(), textbox, model_selector) + (
302
+ disable_btn,
303
+ ) * 5
304
+
305
+
306
+ def http_bot(
307
+ state,
308
+ model_selector,
309
+ temperature,
310
+ top_p,
311
+ repetition_penalty,
312
+ max_new_tokens,
313
+ max_input_tiles,
314
+ # bbox_threshold,
315
+ # mask_threshold,
316
+ request: gr.Request,
317
+ ):
318
+ logger.info(f"http_bot. ip: {request.client.host}")
319
+ start_tstamp = time.time()
320
+ model_name = model_selector
321
+ if hasattr(state, "skip_next") and state.skip_next:
322
+ # This generate call is skipped due to invalid inputs
323
+ yield (
324
+ state,
325
+ state.to_gradio_chatbot(),
326
+ gr.MultimodalTextbox(interactive=False),
327
+ ) + (no_change_btn,) * 5
328
+ return
329
+
330
+ # Query worker address
331
+ controller_url = args.controller_url
332
+ ret = requests.post(
333
+ controller_url + "/get_worker_address", json={"model": model_name}
334
+ )
335
+ worker_addr = ret.json()["address"]
336
+ if worker_addr.startswith("http://0.0.0.0") and args.worker_ip:
337
+ worker_addr = worker_addr.replace("0.0.0.0", args.worker_ip)
338
+ logger.info(f"model_name: {model_name}, worker_addr: {worker_addr}")
339
+
340
+ # No available worker
341
+ if worker_addr == "":
342
+ # state.messages[-1][-1] = server_error_msg
343
+ state.update_message(Conversation.ASSISTANT, server_error_msg)
344
+ yield (
345
+ state,
346
+ state.to_gradio_chatbot(),
347
+ gr.MultimodalTextbox(interactive=False),
348
+ disable_btn,
349
+ disable_btn,
350
+ disable_btn,
351
+ enable_btn,
352
+ enable_btn,
353
+ )
354
+ return
355
+
356
+ all_images = state.get_images(source=state.USER)
357
+ all_image_paths = [state.save_image(image) for image in all_images]
358
+
359
+ # Make requests
360
+ pload = {
361
+ "model": model_name,
362
+ "prompt": state.get_prompt(),
363
+ "temperature": float(temperature),
364
+ "top_p": float(top_p),
365
+ "max_new_tokens": max_new_tokens,
366
+ "max_input_tiles": max_input_tiles,
367
+ # "bbox_threshold": bbox_threshold,
368
+ # "mask_threshold": mask_threshold,
369
+ "repetition_penalty": repetition_penalty,
370
+ "images": f"List of {len(all_images)} images: {all_image_paths}",
371
+ }
372
+ logger.info(f"==== request ====\n{pload}")
373
+ pload.pop("images")
374
+ pload["prompt"] = state.get_prompt(inlude_image=True)
375
+ state.append_message(Conversation.ASSISTANT, state.streaming_placeholder)
376
+ yield (
377
+ state,
378
+ state.to_gradio_chatbot(),
379
+ gr.MultimodalTextbox(interactive=False),
380
+ ) + (disable_btn,) * 5
381
+
382
+ try:
383
+ # Stream output
384
+ response = requests.post(
385
+ worker_addr + "/worker_generate_stream",
386
+ headers=headers,
387
+ json=pload,
388
+ stream=True,
389
+ timeout=20,
390
+ )
391
+ for chunk in response.iter_lines(decode_unicode=False, delimiter=b"\0"):
392
+ if chunk:
393
+ data = json.loads(chunk.decode())
394
+ if data["error_code"] == 0:
395
+ if "text" in data:
396
+ output = data["text"].strip()
397
+ output += state.streaming_placeholder
398
+
399
+ image = None
400
+ if "image" in data:
401
+ image = load_image_from_base64(data["image"])
402
+ _ = state.save_image(image)
403
+
404
+ state.update_message(Conversation.ASSISTANT, output, image)
405
+ yield (
406
+ state,
407
+ state.to_gradio_chatbot(),
408
+ gr.MultimodalTextbox(interactive=False),
409
+ ) + (disable_btn,) * 5
410
+ else:
411
+ output = (
412
+ f"**{data['text']}**" + f" (error_code: {data['error_code']})"
413
+ )
414
+
415
+ state.update_message(Conversation.ASSISTANT, output, None)
416
+ yield (
417
+ state,
418
+ state.to_gradio_chatbot(),
419
+ gr.MultimodalTextbox(interactive=True),
420
+ ) + (
421
+ disable_btn,
422
+ disable_btn,
423
+ disable_btn,
424
+ enable_btn,
425
+ enable_btn,
426
+ )
427
+ return
428
+ except requests.exceptions.RequestException as e:
429
+ state.update_message(Conversation.ASSISTANT, server_error_msg, None)
430
+ yield (
431
+ state,
432
+ state.to_gradio_chatbot(),
433
+ gr.MultimodalTextbox(interactive=True),
434
+ ) + (
435
+ disable_btn,
436
+ disable_btn,
437
+ disable_btn,
438
+ enable_btn,
439
+ enable_btn,
440
+ )
441
+ return
442
+
443
+ ai_response = state.return_last_message()
444
+ if "<ref>" in ai_response:
445
+ returned_image = find_bounding_boxes(state, ai_response)
446
+ returned_image = [returned_image] if returned_image else []
447
+ state.update_message(Conversation.ASSISTANT, ai_response, returned_image)
448
+ if "```drawing-instruction" in ai_response:
449
+ returned_image = query_image_generation(
450
+ ai_response, sd_worker_url=sd_worker_url
451
+ )
452
+ returned_image = [returned_image] if returned_image else []
453
+ state.update_message(Conversation.ASSISTANT, ai_response, returned_image)
454
+
455
+ state.end_of_current_turn()
456
+
457
+ yield (
458
+ state,
459
+ state.to_gradio_chatbot(),
460
+ gr.MultimodalTextbox(interactive=True),
461
+ ) + (enable_btn,) * 5
462
+
463
+ finish_tstamp = time.time()
464
+ logger.info(f"{output}")
465
+ data = {
466
+ "tstamp": round(finish_tstamp, 4),
467
+ "like": None,
468
+ "model": model_name,
469
+ "start": round(start_tstamp, 4),
470
+ "finish": round(start_tstamp, 4),
471
+ "state": state.dict(),
472
+ "images": all_image_paths,
473
+ "ip": request.client.host,
474
+ }
475
+ write2file(get_log_filename(), json.dumps(data) + "\n")
476
+
477
+
478
+ title_html = """
479
+ <h2> <span class="gradient-text" id="text">InternVL2</span><span class="plain-text">: Better than the Best—Expanding Performance Boundaries of Open-Source Multimodal Models with the Progressive Scaling Strategy</span></h2>
480
+ <a href="https://internvl.github.io/blog/2024-07-02-InternVL-2.0/">[📜 InternVL2 Blog]</a>
481
+ <a href="https://huggingface.co/spaces/OpenGVLab/InternVL">[🤗 HF Demo]</a>
482
+ <a href="https://github.com/OpenGVLab/InternVL?tab=readme-ov-file#quick-start-with-huggingface">[🚀 Quick Start]</a>
483
+ <a href="https://github.com/OpenGVLab/InternVL/blob/main/document/How_to_use_InternVL_API.md">[🌐 API]</a>
484
+ """
485
+
486
+ tos_markdown = """
487
+ ### Terms of use
488
+ By using this service, users are required to agree to the following terms:
489
+ The service is a research preview intended for non-commercial use only. It only provides limited safety measures and may generate offensive content. It must not be used for any illegal, harmful, violent, racist, or sexual purposes. The service may collect user dialogue data for future research.
490
+ Please click the "Flag" button if you get any inappropriate answer! We will collect those to keep improving our moderator.
491
+ For an optimal experience, please use desktop computers for this demo, as mobile devices may compromise its quality.
492
+ """
493
+
494
+
495
+ learn_more_markdown = """
496
+ ### License
497
+ The service is a research preview intended for non-commercial use only, subject to the model [License](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) of LLaMA, [Terms of Use](https://openai.com/policies/terms-of-use) of the data generated by OpenAI, and [Privacy Practices](https://chrome.google.com/webstore/detail/sharegpt-share-your-chatg/daiacboceoaocpibfodeljbdfacokfjb) of ShareGPT. Please contact us if you find any potential violation.
498
+
499
+ ### Acknowledgement
500
+ This demo is modified from LLaVA's demo. Thanks for their awesome work!
501
+ """
502
+ # .gradio-container {margin: 5px 10px 0 10px !important};
503
+ block_css = """
504
+ .gradio-container {margin: 0.1% 1% 0 1% !important; max-width: 98% !important;};
505
+ #buttons button {
506
+ min-width: min(120px,100%);
507
+ }
508
+
509
+ .gradient-text {
510
+ font-size: 28px;
511
+ width: auto;
512
+ font-weight: bold;
513
+ background: linear-gradient(45deg, red, orange, yellow, green, blue, indigo, violet);
514
+ background-clip: text;
515
+ -webkit-background-clip: text;
516
+ color: transparent;
517
+ }
518
+
519
+ .plain-text {
520
+ font-size: 22px;
521
+ width: auto;
522
+ font-weight: bold;
523
+ }
524
+ """
525
+
526
+ js = """
527
+ function createWaveAnimation() {
528
+ const text = document.getElementById('text');
529
+ var i = 0;
530
+ setInterval(function() {
531
+ const colors = [
532
+ 'red, orange, yellow, green, blue, indigo, violet, purple',
533
+ 'orange, yellow, green, blue, indigo, violet, purple, red',
534
+ 'yellow, green, blue, indigo, violet, purple, red, orange',
535
+ 'green, blue, indigo, violet, purple, red, orange, yellow',
536
+ 'blue, indigo, violet, purple, red, orange, yellow, green',
537
+ 'indigo, violet, purple, red, orange, yellow, green, blue',
538
+ 'violet, purple, red, orange, yellow, green, blue, indigo',
539
+ 'purple, red, orange, yellow, green, blue, indigo, violet',
540
+ ];
541
+ const angle = 45;
542
+ const colorIndex = i % colors.length;
543
+ text.style.background = `linear-gradient(${angle}deg, ${colors[colorIndex]})`;
544
+ text.style.webkitBackgroundClip = 'text';
545
+ text.style.backgroundClip = 'text';
546
+ text.style.color = 'transparent';
547
+ text.style.fontSize = '28px';
548
+ text.style.width = 'auto';
549
+ text.textContent = 'InternVL2';
550
+ text.style.fontWeight = 'bold';
551
+ i += 1;
552
+ }, 200);
553
+ const params = new URLSearchParams(window.location.search);
554
+ url_params = Object.fromEntries(params);
555
+ // console.log(url_params);
556
+ // console.log('hello world...');
557
+ // console.log(window.location.search);
558
+ // console.log('hello world...');
559
+ // alert(window.location.search)
560
+ // alert(url_params);
561
+ return url_params;
562
+ }
563
+
564
+ """
565
+
566
+
567
+ def build_demo(embed_mode):
568
+ textbox = gr.MultimodalTextbox(
569
+ interactive=True,
570
+ file_types=["image", "video"],
571
+ placeholder="Enter message or upload file...",
572
+ show_label=False,
573
+ )
574
+
575
+ with gr.Blocks(
576
+ title="InternVL-Chat",
577
+ theme=gr.themes.Default(),
578
+ css=block_css,
579
+ ) as demo:
580
+ state = gr.State()
581
+
582
+ if not embed_mode:
583
+ # gr.Markdown(title_markdown)
584
+ gr.HTML(title_html)
585
+
586
+ with gr.Row():
587
+ with gr.Column(scale=2):
588
+
589
+ with gr.Row(elem_id="model_selector_row"):
590
+ model_selector = gr.Dropdown(
591
+ choices=models,
592
+ value=models[0] if len(models) > 0 else "",
593
+ # value="InternVL-Chat-V1-5",
594
+ interactive=True,
595
+ show_label=False,
596
+ container=False,
597
+ )
598
+
599
+ with gr.Accordion("System Prompt", open=False) as system_prompt_row:
600
+ system_prompt = gr.Textbox(
601
+ value="请尽可能详细地回答用户的问题。",
602
+ label="System Prompt",
603
+ interactive=True,
604
+ )
605
+ with gr.Accordion("Parameters", open=False) as parameter_row:
606
+ temperature = gr.Slider(
607
+ minimum=0.0,
608
+ maximum=1.0,
609
+ value=0.2,
610
+ step=0.1,
611
+ interactive=True,
612
+ label="Temperature",
613
+ )
614
+ top_p = gr.Slider(
615
+ minimum=0.0,
616
+ maximum=1.0,
617
+ value=0.7,
618
+ step=0.1,
619
+ interactive=True,
620
+ label="Top P",
621
+ )
622
+ repetition_penalty = gr.Slider(
623
+ minimum=1.0,
624
+ maximum=1.5,
625
+ value=1.1,
626
+ step=0.02,
627
+ interactive=True,
628
+ label="Repetition penalty",
629
+ )
630
+ max_output_tokens = gr.Slider(
631
+ minimum=0,
632
+ maximum=4096,
633
+ value=1024,
634
+ step=64,
635
+ interactive=True,
636
+ label="Max output tokens",
637
+ )
638
+ max_input_tiles = gr.Slider(
639
+ minimum=1,
640
+ maximum=32,
641
+ value=12,
642
+ step=1,
643
+ interactive=True,
644
+ label="Max input tiles (control the image size)",
645
+ )
646
+ examples = gr.Examples(
647
+ examples=[
648
+ [
649
+ {
650
+ "files": [
651
+ "gallery/prod_9.jpg",
652
+ ],
653
+ "text": "What's at the far end of the image?",
654
+ }
655
+ ],
656
+ [
657
+ {
658
+ "files": [
659
+ "gallery/astro_on_unicorn.png",
660
+ ],
661
+ "text": "What does this image mean?",
662
+ }
663
+ ],
664
+ [
665
+ {
666
+ "files": [
667
+ "gallery/prod_12.png",
668
+ ],
669
+ "text": "What are the consequences of the easy decisions shown in this image?",
670
+ }
671
+ ],
672
+ [
673
+ {
674
+ "files": [
675
+ "gallery/child_1.jpg",
676
+ "gallery/child_2.jpg",
677
+ f"gallery/child_3.jpg",
678
+ ],
679
+ "text": "这三帧图片讲述了一件什么事情?",
680
+ }
681
+ ],
682
+ ],
683
+ inputs=[textbox],
684
+ )
685
+
686
+ with gr.Column(scale=8):
687
+ chatbot = gr.Chatbot(
688
+ elem_id="chatbot",
689
+ label="InternVL2",
690
+ height=580,
691
+ show_copy_button=True,
692
+ show_share_button=True,
693
+ avatar_images=[
694
+ "assets/human.png",
695
+ "assets/assistant.png",
696
+ ],
697
+ bubble_full_width=False,
698
+ )
699
+ with gr.Row():
700
+ with gr.Column(scale=8):
701
+ textbox.render()
702
+ with gr.Column(scale=1, min_width=50):
703
+ submit_btn = gr.Button(value="Send", variant="primary")
704
+ with gr.Row(elem_id="buttons") as button_row:
705
+ upvote_btn = gr.Button(value="👍 Upvote", interactive=False)
706
+ downvote_btn = gr.Button(value="👎 Downvote", interactive=False)
707
+ flag_btn = gr.Button(value="⚠️ Flag", interactive=False)
708
+ # stop_btn = gr.Button(value="⏹️ Stop Generation", interactive=False)
709
+ regenerate_btn = gr.Button(
710
+ value="🔄 Regenerate", interactive=False
711
+ )
712
+ clear_btn = gr.Button(value="🗑️ Clear", interactive=False)
713
+
714
+ if not embed_mode:
715
+ gr.Markdown(tos_markdown)
716
+ gr.Markdown(learn_more_markdown)
717
+ url_params = gr.JSON(visible=False)
718
+
719
+ # Register listeners
720
+ btn_list = [upvote_btn, downvote_btn, flag_btn, regenerate_btn, clear_btn]
721
+ upvote_btn.click(
722
+ upvote_last_response,
723
+ [state, model_selector],
724
+ [textbox, upvote_btn, downvote_btn, flag_btn],
725
+ )
726
+ downvote_btn.click(
727
+ downvote_last_response,
728
+ [state, model_selector],
729
+ [textbox, upvote_btn, downvote_btn, flag_btn],
730
+ )
731
+ chatbot.like(
732
+ vote_selected_response,
733
+ [state, model_selector],
734
+ [],
735
+ )
736
+ flag_btn.click(
737
+ flag_last_response,
738
+ [state, model_selector],
739
+ [textbox, upvote_btn, downvote_btn, flag_btn],
740
+ )
741
+ regenerate_btn.click(
742
+ regenerate,
743
+ [state, system_prompt],
744
+ [state, chatbot, textbox] + btn_list,
745
+ ).then(
746
+ http_bot,
747
+ [
748
+ state,
749
+ model_selector,
750
+ temperature,
751
+ top_p,
752
+ repetition_penalty,
753
+ max_output_tokens,
754
+ max_input_tiles,
755
+ # bbox_threshold,
756
+ # mask_threshold,
757
+ ],
758
+ [state, chatbot, textbox] + btn_list,
759
+ )
760
+ clear_btn.click(clear_history, None, [state, chatbot, textbox] + btn_list)
761
+
762
+ textbox.submit(
763
+ add_text,
764
+ [state, textbox, system_prompt, model_selector],
765
+ [state, chatbot, textbox, model_selector] + btn_list,
766
+ ).then(
767
+ http_bot,
768
+ [
769
+ state,
770
+ model_selector,
771
+ temperature,
772
+ top_p,
773
+ repetition_penalty,
774
+ max_output_tokens,
775
+ max_input_tiles,
776
+ # bbox_threshold,
777
+ # mask_threshold,
778
+ ],
779
+ [state, chatbot, textbox] + btn_list,
780
+ )
781
+ submit_btn.click(
782
+ add_text,
783
+ [state, textbox, system_prompt, model_selector],
784
+ [state, chatbot, textbox, model_selector] + btn_list,
785
+ ).then(
786
+ http_bot,
787
+ [
788
+ state,
789
+ model_selector,
790
+ temperature,
791
+ top_p,
792
+ repetition_penalty,
793
+ max_output_tokens,
794
+ max_input_tiles,
795
+ # bbox_threshold,
796
+ # mask_threshold,
797
+ ],
798
+ [state, chatbot, textbox] + btn_list,
799
+ )
800
+
801
+ # NOTE: The following code will be not triggered when deployed on HF space.
802
+ # It's very strange. I don't know why.
803
+ """
804
+ if args.model_list_mode == "once":
805
+ demo.load(
806
+ load_demo,
807
+ [url_params],
808
+ [state, model_selector],
809
+ js=js,
810
+ )
811
+ elif args.model_list_mode == "reload":
812
+ demo.load(
813
+ load_demo_refresh_model_list,
814
+ None,
815
+ [state, model_selector],
816
+ js=js,
817
+ )
818
+ else:
819
+ raise ValueError(f"Unknown model list mode: {args.model_list_mode}")
820
+ """
821
+
822
+ return demo
823
 
824
 
825
  if __name__ == "__main__":
826
+ parser = argparse.ArgumentParser()
827
+ parser.add_argument("--host", type=str, default="0.0.0.0")
828
+ parser.add_argument("--port", type=int, default=7860)
829
+ parser.add_argument("--controller-url", type=str, default=None)
830
+ parser.add_argument("--worker-ip", type=str, default=None)
831
+ parser.add_argument("--concurrency-count", type=int, default=10)
832
+ parser.add_argument(
833
+ "--model-list-mode", type=str, default="reload", choices=["once", "reload"]
834
+ )
835
+ parser.add_argument("--sd-worker-url", type=str, default=None)
836
+ parser.add_argument("--share", action="store_true")
837
+ parser.add_argument("--moderate", action="store_true")
838
+ parser.add_argument("--embed", action="store_true")
839
+ args = parser.parse_args()
840
+ logger.info(f"args: {args}")
841
+ if not args.controller_url:
842
+ args.controller_url = os.environ.get("CONTROLLER_URL", None)
843
+
844
+ if not args.controller_url:
845
+ raise ValueError("controller-url is required.")
846
+
847
+ if not args.worker_ip:
848
+ args.worker_ip = os.environ.get("WORKER_IP", None)
849
+
850
+ models = get_model_list()
851
+
852
+ sd_worker_url = args.sd_worker_url
853
+ logger.info(args)
854
+ demo = build_demo(args.embed)
855
+ demo.queue(api_open=False).launch(
856
+ server_name=args.host,
857
+ server_port=args.port,
858
+ share=args.share,
859
+ max_threads=args.concurrency_count,
860
+ )
model_worker.py CHANGED
@@ -20,7 +20,6 @@ import uuid
20
  import traceback
21
  from functools import partial
22
  from threading import Thread
23
- import multiprocessing as mp
24
 
25
  import requests
26
  import torch
@@ -39,7 +38,6 @@ from utils import (
39
  load_image_from_base64,
40
  )
41
 
42
- mp.set_start_method("fork", force=True)
43
 
44
  worker_id = str(uuid.uuid4())[:6]
45
  logger = build_logger("model_worker", f"model_worker_{worker_id}.log")
@@ -160,7 +158,6 @@ def split_model(model_name):
160
  return device_map
161
 
162
 
163
- @spaces.GPU(duration=120)
164
  def multi_thread_infer(
165
  model, tokenizer, pixel_values, question, history, generation_config
166
  ):
 
20
  import traceback
21
  from functools import partial
22
  from threading import Thread
 
23
 
24
  import requests
25
  import torch
 
38
  load_image_from_base64,
39
  )
40
 
 
41
 
42
  worker_id = str(uuid.uuid4())[:6]
43
  logger = build_logger("model_worker", f"model_worker_{worker_id}.log")
 
158
  return device_map
159
 
160
 
 
161
  def multi_thread_infer(
162
  model, tokenizer, pixel_values, question, history, generation_config
163
  ):