tanhuajie2001
commited on
Commit
•
064fcb5
1
Parent(s):
2703754
Update README.md
Browse files
README.md
CHANGED
@@ -13,6 +13,45 @@ base_model:
|
|
13 |
[[Paper]](https://arxiv.org/abs/2407.17331) [[GitHub]](https://github.com/deepglint/unicom)
|
14 |
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
## Usage
|
17 |
|
18 |
### A. Installation
|
@@ -96,45 +135,4 @@ pip install lmms-eval==0.2.0
|
|
96 |
bash eval.sh
|
97 |
```
|
98 |
|
99 |
-
## Embodied Ability Evaluation: Performance in RoboVQA and OpenEQA
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
| | | MLCD <br> Embodied-7B | LLaVA <br> OneVision-7B | GPT-4v | RoboMamba |
|
104 |
-
:-- | :-- | :-: | :-: | :-: | :-: |
|
105 |
-
| RoboVQA | BLEU1 | <span style="color:red">73.16</span> | 38.12 | - | 54.9 |
|
106 |
-
| | BLEU2 | <span style="color:red">66.39</span> | 33.56 | - | 44.2 |
|
107 |
-
| | BLEU3 | <span style="color:red">60.61</span> | 31.76 | - | 39.5 |
|
108 |
-
| | BLEU4 | <span style="color:red">56.56</span> | 30.97 | - | 36.3 |
|
109 |
-
| OpenEQA | Object State Recognition | <span style="color:red">71.83</span> | - | 63.2 | - |
|
110 |
-
| | Object Recognition | <span style="color:red">49.46</span> | - | 43.4 | - |
|
111 |
-
| | Functional Reasoning | 54.38 | - | <span style="color:red">57.4</span> | - |
|
112 |
-
| | Spatial Understanding | <span style="color:red">48.64</span> | - | 33.6 | - |
|
113 |
-
| | Attribute Recognition | <span style="color:red">67.08</span> | - | 57.2 | - |
|
114 |
-
| | World Knowledge | <span style="color:red">53.87</span> | - | 50.7 | - |
|
115 |
-
| | Object Localization | <span style="color:red">43.06</span> | - | 42.0 | - |
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
## General Ability Evaluation: Comparison with LLaVA OneVision-7B and GPT-4
|
121 |
-
|
122 |
-
| Dataset | Split | MLCD<br>Embodied-7B | LLaVA<br>OneVision-7B | GPT-4v | GPT-4o |
|
123 |
-
| :-- | :-: | :-: | :-: | :-: | :-: |
|
124 |
-
| A12D | test | 79.9 | 81.4 | 78.2 | 94.2 |
|
125 |
-
| ChartQA | test | 83.0 | 80.0 | 78.5 | 85.7 |
|
126 |
-
| DocVQA | test | 91.6 | 87.5 | 88.4 | 92.8 |
|
127 |
-
| InfoVQA | val | 73.9 | 70.7 | - | - |
|
128 |
-
| InfoVQA | test | 70.0 | 68.8 | - | - |
|
129 |
-
| MMMU | val | 47.3 | 48.8 | 56.8 | 69.1 |
|
130 |
-
| MMStar | test | 58.5 | 61.7 | 57.1 | 63.9 |
|
131 |
-
| OCRBench | - | 749.0 | 697.0 | 656.0 | 805.0 |
|
132 |
-
| RealWorldQA | test | 68.9 | 66.3 | 61.4 | 58.6 |
|
133 |
-
| SeedBench | image | 74.9 | 75.4 | 49.9 | 76.2 |
|
134 |
-
| MMbench | en-dev | 81.1 | 83.2 | 81.3 | 83.4 |
|
135 |
-
| MMbench | en-test | 80.1 | 80.8 | 75.0 | - |
|
136 |
-
| MME | test | 578/1603 | 418/1580 | 517/1409 | - |
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
We would like to express our gratitude to [Huajie Tan](https://huggingface.co/tanhuajie2001), [Yumeng Wang](https://huggingface.co/devymex), [Yin Xie](https://huggingface.co/Yin-Xie) for his significant contributions to the experimental validation in MLLMs.
|
|
|
13 |
[[Paper]](https://arxiv.org/abs/2407.17331) [[GitHub]](https://github.com/deepglint/unicom)
|
14 |
|
15 |
|
16 |
+
## Embodied Ability Evaluation: Performance in RoboVQA and OpenEQA
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
| | | MLCD <br> Embodied-7B | LLaVA <br> OneVision-7B | GPT-4v | RoboMamba |
|
21 |
+
:-- | :-- | :-: | :-: | :-: | :-: |
|
22 |
+
| RoboVQA | BLEU1 | <span style="color:red">73.16</span> | 38.12 | - | 54.9 |
|
23 |
+
| | BLEU2 | <span style="color:red">66.39</span> | 33.56 | - | 44.2 |
|
24 |
+
| | BLEU3 | <span style="color:red">60.61</span> | 31.76 | - | 39.5 |
|
25 |
+
| | BLEU4 | <span style="color:red">56.56</span> | 30.97 | - | 36.3 |
|
26 |
+
| OpenEQA | Object State Recognition | <span style="color:red">71.83</span> | - | 63.2 | - |
|
27 |
+
| | Object Recognition | <span style="color:red">49.46</span> | - | 43.4 | - |
|
28 |
+
| | Functional Reasoning | 54.38 | - | <span style="color:red">57.4</span> | - |
|
29 |
+
| | Spatial Understanding | <span style="color:red">48.64</span> | - | 33.6 | - |
|
30 |
+
| | Attribute Recognition | <span style="color:red">67.08</span> | - | 57.2 | - |
|
31 |
+
| | World Knowledge | <span style="color:red">53.87</span> | - | 50.7 | - |
|
32 |
+
| | Object Localization | <span style="color:red">43.06</span> | - | 42.0 | - |
|
33 |
+
|
34 |
+
|
35 |
+
|
36 |
+
|
37 |
+
## General Ability Evaluation: Comparison with LLaVA OneVision-7B and GPT-4
|
38 |
+
|
39 |
+
| Dataset | Split | MLCD<br>Embodied-7B | LLaVA<br>OneVision-7B | GPT-4v | GPT-4o |
|
40 |
+
| :-- | :-: | :-: | :-: | :-: | :-: |
|
41 |
+
| A12D | test | 79.9 | 81.4 | 78.2 | 94.2 |
|
42 |
+
| ChartQA | test | 83.0 | 80.0 | 78.5 | 85.7 |
|
43 |
+
| DocVQA | test | 91.6 | 87.5 | 88.4 | 92.8 |
|
44 |
+
| InfoVQA | val | 73.9 | 70.7 | - | - |
|
45 |
+
| InfoVQA | test | 70.0 | 68.8 | - | - |
|
46 |
+
| MMMU | val | 47.3 | 48.8 | 56.8 | 69.1 |
|
47 |
+
| MMStar | test | 58.5 | 61.7 | 57.1 | 63.9 |
|
48 |
+
| OCRBench | - | 749.0 | 697.0 | 656.0 | 805.0 |
|
49 |
+
| RealWorldQA | test | 68.9 | 66.3 | 61.4 | 58.6 |
|
50 |
+
| SeedBench | image | 74.9 | 75.4 | 49.9 | 76.2 |
|
51 |
+
| MMbench | en-dev | 81.1 | 83.2 | 81.3 | 83.4 |
|
52 |
+
| MMbench | en-test | 80.1 | 80.8 | 75.0 | - |
|
53 |
+
| MME | test | 578/1603 | 418/1580 | 517/1409 | - |
|
54 |
+
|
55 |
## Usage
|
56 |
|
57 |
### A. Installation
|
|
|
135 |
bash eval.sh
|
136 |
```
|
137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
We would like to express our gratitude to [Huajie Tan](https://huggingface.co/tanhuajie2001), [Yumeng Wang](https://huggingface.co/devymex), [Yin Xie](https://huggingface.co/Yin-Xie) for his significant contributions to the experimental validation in MLLMs.
|