chenlin commited on
Commit
8ce49c8
β€’
1 Parent(s): 03aa805

support zero gpu

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. app.py +107 -7
  3. requirements.txt +14 -1
  4. templates/index.html +0 -33
README.md CHANGED
@@ -11,6 +11,6 @@ license: apache-2.0
11
  ---
12
 
13
  **Paper or resources for more information:**
14
- [[Project](https://ShareGPT4V.github.io/)] [[Paper](https://huggingface.co/papers/2311.12793)] [[Code](https://github.com/InternLM/InternLM-XComposer/tree/main/projects/ShareGPT4V)]
15
 
16
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
11
  ---
12
 
13
  **Paper or resources for more information:**
14
+ [[Project](https://ShareGPT4V.github.io/)] [[Paper](https://huggingface.co/papers/2311.12793)] [[Code](https://github.com/ShareGPT4Omni/ShareGPT4V)]
15
 
16
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -1,12 +1,112 @@
1
- from flask import Flask, render_template
 
 
 
 
2
 
3
- app = Flask(__name__)
 
 
 
 
4
 
 
 
5
 
6
- @app.route("/")
7
- def index():
8
- return render_template("index.html")
 
9
 
10
 
11
- if __name__ == "__main__":
12
- app.run(debug=False, port=7860, host="0.0.0.0")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import spaces
4
+ from PIL import Image
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer
6
 
7
+ model_name = "Lin-Chen/ShareCaptioner"
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ model_name, device_map="cuda", trust_remote_code=True).eval()
11
+ model.tokenizer = tokenizer
12
 
13
+ model.cuda()
14
+ model.half()
15
 
16
+ seg1 = '<|User|>:'
17
+ seg2 = f'Analyze the image in a comprehensive and detailed manner.{model.eoh}\n<|Bot|>:'
18
+ seg_emb1 = model.encode_text(seg1, add_special_tokens=True)
19
+ seg_emb2 = model.encode_text(seg2, add_special_tokens=False)
20
 
21
 
22
+ @spaces.GPU(duration=60)
23
+ def detailed_caption(img_path):
24
+ subs = []
25
+ image = Image.open(img_path).convert("RGB")
26
+ subs.append(model.vis_processor(image).unsqueeze(0))
27
+
28
+ subs = torch.cat(subs, dim=0).cuda()
29
+ tmp_bs = subs.shape[0]
30
+ tmp_seg_emb1 = seg_emb1.repeat(tmp_bs, 1, 1)
31
+ tmp_seg_emb2 = seg_emb2.repeat(tmp_bs, 1, 1)
32
+ with torch.cuda.amp.autocast():
33
+ with torch.no_grad():
34
+ subs = model.encode_img(subs)
35
+ input_emb = torch.cat([tmp_seg_emb1, subs, tmp_seg_emb2], dim=1)
36
+ out_embeds = model.internlm_model.generate(inputs_embeds=input_emb,
37
+ max_length=500,
38
+ num_beams=3,
39
+ min_length=1,
40
+ do_sample=True,
41
+ repetition_penalty=1.5,
42
+ length_penalty=1.0,
43
+ temperature=1.,
44
+ eos_token_id=model.tokenizer.eos_token_id,
45
+ num_return_sequences=1,
46
+ )
47
+
48
+ return model.decode_text([out_embeds[0]])
49
+
50
+
51
+ block_css = """
52
+ #buttons button {
53
+ min-width: min(120px,100%);
54
+ }
55
+ """
56
+ title_markdown = ("""
57
+ # 🐬 ShareGPT4V: Improving Large Multi-modal Models with Better Captions
58
+ [[Project Page](https://sharegpt4v.github.io/)] [[Code](https://github.com/ShareGPT4Omni/ShareGPT4V)] | [[Paper](https://github.com/InternLM/InternLM-XComposer/blob/main/projects/ShareGPT4V/ShareGPT4V.pdf)]
59
+ """)
60
+ tos_markdown = ("""
61
+ ### Terms of use
62
+ By using this service, users are required to agree to the following terms:
63
+ The service is a research preview intended for non-commercial use only. It only provides limited safety measures and may generate offensive content. It must not be used for any illegal, harmful, violent, racist, or sexual purposes.
64
+ For an optimal experience, please use desktop computers for this demo, as mobile devices may compromise its quality.
65
+ """)
66
+ learn_more_markdown = ("""
67
+ ### License
68
+ The service is a research preview intended for non-commercial use only, subject to the model [License](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) of LLaMA, [Terms of Use](https://openai.com/policies/terms-of-use) of the data generated by OpenAI, and [Privacy Practices](https://chrome.google.com/webstore/detail/sharegpt-share-your-chatg/daiacboceoaocpibfodeljbdfacokfjb) of ShareGPT. Please contact us if you find any potential violation.
69
+ """)
70
+ ack_markdown = ("""
71
+ ### Acknowledgement
72
+ The template for this web demo is from [LLaVA](https://github.com/haotian-liu/LLaVA), and we are very grateful to LLaVA for their open source contributions to the community!
73
+ """)
74
+
75
+
76
+ def build_demo():
77
+ with gr.Blocks(title="Share-Captioner", theme=gr.themes.Default(), css=block_css) as demo:
78
+ gr.Markdown(title_markdown)
79
+
80
+ with gr.Row():
81
+ with gr.Column(scale=5):
82
+ with gr.Row(elem_id="Model ID"):
83
+ gr.Dropdown(
84
+ choices=['Share-Captioner'],
85
+ value='Share-Captioner',
86
+ interactive=True,
87
+ label='Model ID',
88
+ container=False)
89
+ img_path = gr.Image(label="Image", type="filepath")
90
+ with gr.Column(scale=8):
91
+ with gr.Row():
92
+ caption = gr.Textbox(label='Caption')
93
+ with gr.Row():
94
+ submit_btn = gr.Button(
95
+ value="πŸš€ Generate", variant="primary")
96
+ regenerate_btn = gr.Button(value="πŸ”„ Regenerate")
97
+
98
+ gr.Markdown(tos_markdown)
99
+ gr.Markdown(learn_more_markdown)
100
+ gr.Markdown(ack_markdown)
101
+
102
+ submit_btn.click(detailed_caption, inputs=[
103
+ img_path], outputs=[caption])
104
+ regenerate_btn.click(detailed_caption, inputs=[
105
+ img_path], outputs=[caption])
106
+
107
+ return demo
108
+
109
+
110
+ if __name__ == '__main__':
111
+ demo = build_demo()
112
+ demo.launch()
requirements.txt CHANGED
@@ -1 +1,14 @@
1
- flask
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ transformers==4.32.0
2
+ accelerate==0.24.0
3
+ tiktoken==0.5.1
4
+ einops==0.7.0
5
+ transformers_stream_generator==0.0.4
6
+ scipy==1.11.3
7
+ torchvision==0.15.2
8
+ pillow==10.0.1
9
+ matplotlib==3.8.0
10
+ gradio==3.50.2
11
+ sentencepiece
12
+ urllib3==1.26.18
13
+ timm==0.6.13
14
+ spaces
templates/index.html DELETED
@@ -1,33 +0,0 @@
1
- <!DOCTYPE html>
2
- <html>
3
-
4
- <head>
5
- <meta charset="utf-8" />
6
- <meta name="viewport" content="width=device-width" />
7
- <title>My static Space</title>
8
- <style>
9
- body {
10
- margin: 0;
11
- }
12
- .space {
13
- max-width: 100%;
14
- max-height: 100%;
15
- width: 100vw;
16
- height: 100vh;
17
- overflow: hidden;
18
- }
19
- .iframe {
20
- min-width: 100%;
21
- min-height: 100%;
22
- }
23
- </style>
24
- </head>
25
-
26
- <body>
27
- <div class="space">
28
- <iframe class="iframe" allowfullscreen="true" frameborder="0" src="https://g-app-center-034633-9425-bvmokjk.openxlab.space">
29
- </iframe>
30
- </div>
31
- </body>
32
-
33
- </html>