Spaces:

Lin-Chen
/

Share-Captioner

Running on Zero

App Files Files Community

chenlin commited on Jun 18

Commit

8ce49c8

•

1 Parent(s): 03aa805

support zero gpu

Browse files

Files changed (4) hide show

README.md +1 -1
app.py +107 -7
requirements.txt +14 -1
templates/index.html +0 -33

README.md CHANGED Viewed

@@ -11,6 +11,6 @@ license: apache-2.0
 ---
 **Paper or resources for more information:**
-[[Project](https://ShareGPT4V.github.io/)] [[Paper](https://huggingface.co/papers/2311.12793)] [[Code](https://github.com/InternLM/InternLM-XComposer/tree/main/projects/ShareGPT4V)]
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 **Paper or resources for more information:**
+[[Project](https://ShareGPT4V.github.io/)] [[Paper](https://huggingface.co/papers/2311.12793)] [[Code](https://github.com/ShareGPT4Omni/ShareGPT4V)]
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -1,12 +1,112 @@
-from flask import Flask, render_template
-app = Flask(__name__)
-@app.route("/")
-def index():
-    return render_template("index.html")
-if __name__ == "__main__":
-    app.run(debug=False, port=7860, host="0.0.0.0")

+import gradio as gr
+import torch
+import spaces
+from PIL import Image
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model_name = "Lin-Chen/ShareCaptioner"
+tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(
+    model_name, device_map="cuda", trust_remote_code=True).eval()
+model.tokenizer = tokenizer
+model.cuda()
+model.half()
+seg1 = '<|User|>:'
+seg2 = f'Analyze the image in a comprehensive and detailed manner.{model.eoh}\n<|Bot|>:'
+seg_emb1 = model.encode_text(seg1, add_special_tokens=True)
+seg_emb2 = model.encode_text(seg2, add_special_tokens=False)
+@spaces.GPU(duration=60)
+def detailed_caption(img_path):
+    subs = []
+    image = Image.open(img_path).convert("RGB")
+    subs.append(model.vis_processor(image).unsqueeze(0))
+    subs = torch.cat(subs, dim=0).cuda()
+    tmp_bs = subs.shape[0]
+    tmp_seg_emb1 = seg_emb1.repeat(tmp_bs, 1, 1)
+    tmp_seg_emb2 = seg_emb2.repeat(tmp_bs, 1, 1)
+    with torch.cuda.amp.autocast():
+        with torch.no_grad():
+            subs = model.encode_img(subs)
+            input_emb = torch.cat([tmp_seg_emb1, subs, tmp_seg_emb2], dim=1)
+            out_embeds = model.internlm_model.generate(inputs_embeds=input_emb,
+                                                       max_length=500,
+                                                       num_beams=3,
+                                                       min_length=1,
+                                                       do_sample=True,
+                                                       repetition_penalty=1.5,
+                                                       length_penalty=1.0,
+                                                       temperature=1.,
+                                                       eos_token_id=model.tokenizer.eos_token_id,
+                                                       num_return_sequences=1,
+                                                       )
+    return model.decode_text([out_embeds[0]])
+block_css = """
+#buttons button {
+    min-width: min(120px,100%);
+}
+"""
+title_markdown = ("""
+# 🐬 ShareGPT4V: Improving Large Multi-modal Models with Better Captions
+[[Project Page](https://sharegpt4v.github.io/)] [[Code](https://github.com/ShareGPT4Omni/ShareGPT4V)] | [[Paper](https://github.com/InternLM/InternLM-XComposer/blob/main/projects/ShareGPT4V/ShareGPT4V.pdf)]
+""")
+tos_markdown = ("""
+### Terms of use
+By using this service, users are required to agree to the following terms:
+The service is a research preview intended for non-commercial use only. It only provides limited safety measures and may generate offensive content. It must not be used for any illegal, harmful, violent, racist, or sexual purposes.
+For an optimal experience, please use desktop computers for this demo, as mobile devices may compromise its quality.
+""")
+learn_more_markdown = ("""
+### License
+The service is a research preview intended for non-commercial use only, subject to the model [License](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) of LLaMA, [Terms of Use](https://openai.com/policies/terms-of-use) of the data generated by OpenAI, and [Privacy Practices](https://chrome.google.com/webstore/detail/sharegpt-share-your-chatg/daiacboceoaocpibfodeljbdfacokfjb) of ShareGPT. Please contact us if you find any potential violation.
+""")
+ack_markdown = ("""
+### Acknowledgement
+The template for this web demo is from [LLaVA](https://github.com/haotian-liu/LLaVA), and we are very grateful to LLaVA for their open source contributions to the community!
+""")
+def build_demo():
+    with gr.Blocks(title="Share-Captioner", theme=gr.themes.Default(), css=block_css) as demo:
+        gr.Markdown(title_markdown)
+        with gr.Row():
+            with gr.Column(scale=5):
+                with gr.Row(elem_id="Model ID"):
+                    gr.Dropdown(
+                        choices=['Share-Captioner'],
+                        value='Share-Captioner',
+                        interactive=True,
+                        label='Model ID',
+                        container=False)
+                img_path = gr.Image(label="Image", type="filepath")
+            with gr.Column(scale=8):
+                with gr.Row():
+                    caption = gr.Textbox(label='Caption')
+                with gr.Row():
+                    submit_btn = gr.Button(
+                        value="🚀 Generate", variant="primary")
+                    regenerate_btn = gr.Button(value="🔄 Regenerate")
+        gr.Markdown(tos_markdown)
+        gr.Markdown(learn_more_markdown)
+        gr.Markdown(ack_markdown)
+        submit_btn.click(detailed_caption, inputs=[
+                         img_path], outputs=[caption])
+        regenerate_btn.click(detailed_caption, inputs=[
+                             img_path], outputs=[caption])
+    return demo
+if __name__ == '__main__':
+    demo = build_demo()
+    demo.launch()

requirements.txt CHANGED Viewed

	@@ -1 +1,14 @@
1	- ~~flask~~

+transformers==4.32.0
+accelerate==0.24.0
+tiktoken==0.5.1
+einops==0.7.0
+transformers_stream_generator==0.0.4
+scipy==1.11.3
+torchvision==0.15.2
+pillow==10.0.1
+matplotlib==3.8.0
+gradio==3.50.2
+sentencepiece
+urllib3==1.26.18
+timm==0.6.13
+spaces

templates/index.html DELETED Viewed

@@ -1,33 +0,0 @@
-<!DOCTYPE html>
-<html>
-<head>
-    <meta charset="utf-8" />
-    <meta name="viewport" content="width=device-width" />
-    <title>My static Space</title>
-    <style>
-        body {
-            margin: 0;
-        }
-        .space {
-            max-width: 100%;
-            max-height: 100%;
-            width: 100vw;
-            height: 100vh;
-            overflow: hidden;
-        }
-        .iframe {
-            min-width: 100%;
-            min-height: 100%;
-        }
-    </style>
-</head>
-<body>
-    <div class="space">
-        <iframe class="iframe" allowfullscreen="true" frameborder="0" src="https://g-app-center-034633-9425-bvmokjk.openxlab.space">
-        </iframe>
-    </div>
-</body>
-</html>