Spaces:
Running
on
Zero
Running
on
Zero
chenlin
commited on
Commit
β’
8ce49c8
1
Parent(s):
03aa805
support zero gpu
Browse files- README.md +1 -1
- app.py +107 -7
- requirements.txt +14 -1
- templates/index.html +0 -33
README.md
CHANGED
@@ -11,6 +11,6 @@ license: apache-2.0
|
|
11 |
---
|
12 |
|
13 |
**Paper or resources for more information:**
|
14 |
-
[[Project](https://ShareGPT4V.github.io/)] [[Paper](https://huggingface.co/papers/2311.12793)] [[Code](https://github.com/
|
15 |
|
16 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
11 |
---
|
12 |
|
13 |
**Paper or resources for more information:**
|
14 |
+
[[Project](https://ShareGPT4V.github.io/)] [[Paper](https://huggingface.co/papers/2311.12793)] [[Code](https://github.com/ShareGPT4Omni/ShareGPT4V)]
|
15 |
|
16 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
@@ -1,12 +1,112 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
2 |
|
3 |
-
|
|
|
|
|
|
|
|
|
4 |
|
|
|
|
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
|
|
|
9 |
|
10 |
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import torch
|
3 |
+
import spaces
|
4 |
+
from PIL import Image
|
5 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
6 |
|
7 |
+
model_name = "Lin-Chen/ShareCaptioner"
|
8 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
9 |
+
model = AutoModelForCausalLM.from_pretrained(
|
10 |
+
model_name, device_map="cuda", trust_remote_code=True).eval()
|
11 |
+
model.tokenizer = tokenizer
|
12 |
|
13 |
+
model.cuda()
|
14 |
+
model.half()
|
15 |
|
16 |
+
seg1 = '<|User|>:'
|
17 |
+
seg2 = f'Analyze the image in a comprehensive and detailed manner.{model.eoh}\n<|Bot|>:'
|
18 |
+
seg_emb1 = model.encode_text(seg1, add_special_tokens=True)
|
19 |
+
seg_emb2 = model.encode_text(seg2, add_special_tokens=False)
|
20 |
|
21 |
|
22 |
+
@spaces.GPU(duration=60)
|
23 |
+
def detailed_caption(img_path):
|
24 |
+
subs = []
|
25 |
+
image = Image.open(img_path).convert("RGB")
|
26 |
+
subs.append(model.vis_processor(image).unsqueeze(0))
|
27 |
+
|
28 |
+
subs = torch.cat(subs, dim=0).cuda()
|
29 |
+
tmp_bs = subs.shape[0]
|
30 |
+
tmp_seg_emb1 = seg_emb1.repeat(tmp_bs, 1, 1)
|
31 |
+
tmp_seg_emb2 = seg_emb2.repeat(tmp_bs, 1, 1)
|
32 |
+
with torch.cuda.amp.autocast():
|
33 |
+
with torch.no_grad():
|
34 |
+
subs = model.encode_img(subs)
|
35 |
+
input_emb = torch.cat([tmp_seg_emb1, subs, tmp_seg_emb2], dim=1)
|
36 |
+
out_embeds = model.internlm_model.generate(inputs_embeds=input_emb,
|
37 |
+
max_length=500,
|
38 |
+
num_beams=3,
|
39 |
+
min_length=1,
|
40 |
+
do_sample=True,
|
41 |
+
repetition_penalty=1.5,
|
42 |
+
length_penalty=1.0,
|
43 |
+
temperature=1.,
|
44 |
+
eos_token_id=model.tokenizer.eos_token_id,
|
45 |
+
num_return_sequences=1,
|
46 |
+
)
|
47 |
+
|
48 |
+
return model.decode_text([out_embeds[0]])
|
49 |
+
|
50 |
+
|
51 |
+
block_css = """
|
52 |
+
#buttons button {
|
53 |
+
min-width: min(120px,100%);
|
54 |
+
}
|
55 |
+
"""
|
56 |
+
title_markdown = ("""
|
57 |
+
# π¬ ShareGPT4V: Improving Large Multi-modal Models with Better Captions
|
58 |
+
[[Project Page](https://sharegpt4v.github.io/)] [[Code](https://github.com/ShareGPT4Omni/ShareGPT4V)] | [[Paper](https://github.com/InternLM/InternLM-XComposer/blob/main/projects/ShareGPT4V/ShareGPT4V.pdf)]
|
59 |
+
""")
|
60 |
+
tos_markdown = ("""
|
61 |
+
### Terms of use
|
62 |
+
By using this service, users are required to agree to the following terms:
|
63 |
+
The service is a research preview intended for non-commercial use only. It only provides limited safety measures and may generate offensive content. It must not be used for any illegal, harmful, violent, racist, or sexual purposes.
|
64 |
+
For an optimal experience, please use desktop computers for this demo, as mobile devices may compromise its quality.
|
65 |
+
""")
|
66 |
+
learn_more_markdown = ("""
|
67 |
+
### License
|
68 |
+
The service is a research preview intended for non-commercial use only, subject to the model [License](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) of LLaMA, [Terms of Use](https://openai.com/policies/terms-of-use) of the data generated by OpenAI, and [Privacy Practices](https://chrome.google.com/webstore/detail/sharegpt-share-your-chatg/daiacboceoaocpibfodeljbdfacokfjb) of ShareGPT. Please contact us if you find any potential violation.
|
69 |
+
""")
|
70 |
+
ack_markdown = ("""
|
71 |
+
### Acknowledgement
|
72 |
+
The template for this web demo is from [LLaVA](https://github.com/haotian-liu/LLaVA), and we are very grateful to LLaVA for their open source contributions to the community!
|
73 |
+
""")
|
74 |
+
|
75 |
+
|
76 |
+
def build_demo():
|
77 |
+
with gr.Blocks(title="Share-Captioner", theme=gr.themes.Default(), css=block_css) as demo:
|
78 |
+
gr.Markdown(title_markdown)
|
79 |
+
|
80 |
+
with gr.Row():
|
81 |
+
with gr.Column(scale=5):
|
82 |
+
with gr.Row(elem_id="Model ID"):
|
83 |
+
gr.Dropdown(
|
84 |
+
choices=['Share-Captioner'],
|
85 |
+
value='Share-Captioner',
|
86 |
+
interactive=True,
|
87 |
+
label='Model ID',
|
88 |
+
container=False)
|
89 |
+
img_path = gr.Image(label="Image", type="filepath")
|
90 |
+
with gr.Column(scale=8):
|
91 |
+
with gr.Row():
|
92 |
+
caption = gr.Textbox(label='Caption')
|
93 |
+
with gr.Row():
|
94 |
+
submit_btn = gr.Button(
|
95 |
+
value="π Generate", variant="primary")
|
96 |
+
regenerate_btn = gr.Button(value="π Regenerate")
|
97 |
+
|
98 |
+
gr.Markdown(tos_markdown)
|
99 |
+
gr.Markdown(learn_more_markdown)
|
100 |
+
gr.Markdown(ack_markdown)
|
101 |
+
|
102 |
+
submit_btn.click(detailed_caption, inputs=[
|
103 |
+
img_path], outputs=[caption])
|
104 |
+
regenerate_btn.click(detailed_caption, inputs=[
|
105 |
+
img_path], outputs=[caption])
|
106 |
+
|
107 |
+
return demo
|
108 |
+
|
109 |
+
|
110 |
+
if __name__ == '__main__':
|
111 |
+
demo = build_demo()
|
112 |
+
demo.launch()
|
requirements.txt
CHANGED
@@ -1 +1,14 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
transformers==4.32.0
|
2 |
+
accelerate==0.24.0
|
3 |
+
tiktoken==0.5.1
|
4 |
+
einops==0.7.0
|
5 |
+
transformers_stream_generator==0.0.4
|
6 |
+
scipy==1.11.3
|
7 |
+
torchvision==0.15.2
|
8 |
+
pillow==10.0.1
|
9 |
+
matplotlib==3.8.0
|
10 |
+
gradio==3.50.2
|
11 |
+
sentencepiece
|
12 |
+
urllib3==1.26.18
|
13 |
+
timm==0.6.13
|
14 |
+
spaces
|
templates/index.html
DELETED
@@ -1,33 +0,0 @@
|
|
1 |
-
<!DOCTYPE html>
|
2 |
-
<html>
|
3 |
-
|
4 |
-
<head>
|
5 |
-
<meta charset="utf-8" />
|
6 |
-
<meta name="viewport" content="width=device-width" />
|
7 |
-
<title>My static Space</title>
|
8 |
-
<style>
|
9 |
-
body {
|
10 |
-
margin: 0;
|
11 |
-
}
|
12 |
-
.space {
|
13 |
-
max-width: 100%;
|
14 |
-
max-height: 100%;
|
15 |
-
width: 100vw;
|
16 |
-
height: 100vh;
|
17 |
-
overflow: hidden;
|
18 |
-
}
|
19 |
-
.iframe {
|
20 |
-
min-width: 100%;
|
21 |
-
min-height: 100%;
|
22 |
-
}
|
23 |
-
</style>
|
24 |
-
</head>
|
25 |
-
|
26 |
-
<body>
|
27 |
-
<div class="space">
|
28 |
-
<iframe class="iframe" allowfullscreen="true" frameborder="0" src="https://g-app-center-034633-9425-bvmokjk.openxlab.space">
|
29 |
-
</iframe>
|
30 |
-
</div>
|
31 |
-
</body>
|
32 |
-
|
33 |
-
</html>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|