KennyUTC commited on
Commit
e77f84c
1 Parent(s): fd6c543

[Code] Update leaderboard

Browse files
Files changed (4) hide show
  1. .gitignore +2 -0
  2. app.py +20 -3
  3. gen_table.py +19 -7
  4. meta_data.py +4 -2
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *ipynb
2
+ __pycache__
app.py CHANGED
@@ -1,11 +1,24 @@
1
  import abc
2
-
3
  import gradio as gr
4
 
5
  from gen_table import *
6
  from meta_data import *
7
 
8
- with gr.Blocks() as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  struct = load_results()
10
  timestamp = struct['time']
11
  EVAL_TIME = format_timestamp(timestamp)
@@ -55,10 +68,11 @@ with gr.Blocks() as demo:
55
  type='pandas',
56
  datatype=[type_map[x] for x in headers],
57
  interactive=False,
 
58
  visible=True)
59
 
60
  def filter_df(fields, model_size, model_type):
61
- filter_list = ['Avg Score', 'Avg Rank', 'OpenSource', 'Verified']
62
  headers = ['Rank'] + check_box['essential'] + fields
63
 
64
  new_fields = [field for field in fields if field not in filter_list]
@@ -78,6 +92,7 @@ with gr.Blocks() as demo:
78
  type='pandas',
79
  datatype=[type_map[x] for x in headers],
80
  interactive=False,
 
81
  visible=True)
82
  return comp
83
 
@@ -124,6 +139,7 @@ with gr.Blocks() as demo:
124
  type='pandas',
125
  datatype=[s.type_map[x] for x in s.headers],
126
  interactive=False,
 
127
  visible=True)
128
  s.dataset = gr.Textbox(value=dataset, label=dataset, visible=False)
129
 
@@ -145,6 +161,7 @@ with gr.Blocks() as demo:
145
  type='pandas',
146
  datatype=[s.type_map[x] for x in headers],
147
  interactive=False,
 
148
  visible=True)
149
  return comp
150
 
 
1
  import abc
 
2
  import gradio as gr
3
 
4
  from gen_table import *
5
  from meta_data import *
6
 
7
+ # import pandas as pd
8
+ # pd.set_option('display.max_colwidth', 0)
9
+
10
+ head_style = """
11
+ <style>
12
+ @media (min-width: 1536px)
13
+ {
14
+ .gradio-container {
15
+ min-width: var(--size-full) !important;
16
+ }
17
+ }
18
+ </style>
19
+ """
20
+
21
+ with gr.Blocks(title="Open VLM Leaderboard", head=head_style) as demo:
22
  struct = load_results()
23
  timestamp = struct['time']
24
  EVAL_TIME = format_timestamp(timestamp)
 
68
  type='pandas',
69
  datatype=[type_map[x] for x in headers],
70
  interactive=False,
71
+ wrap=True,
72
  visible=True)
73
 
74
  def filter_df(fields, model_size, model_type):
75
+ filter_list = ['Avg Score', 'Avg Rank', 'OpenSource']
76
  headers = ['Rank'] + check_box['essential'] + fields
77
 
78
  new_fields = [field for field in fields if field not in filter_list]
 
92
  type='pandas',
93
  datatype=[type_map[x] for x in headers],
94
  interactive=False,
95
+ wrap=True,
96
  visible=True)
97
  return comp
98
 
 
139
  type='pandas',
140
  datatype=[s.type_map[x] for x in s.headers],
141
  interactive=False,
142
+ wrap=True,
143
  visible=True)
144
  s.dataset = gr.Textbox(value=dataset, label=dataset, visible=False)
145
 
 
161
  type='pandas',
162
  datatype=[s.type_map[x] for x in headers],
163
  interactive=False,
164
+ wrap=True,
165
  visible=True)
166
  return comp
167
 
gen_table.py CHANGED
@@ -54,16 +54,14 @@ def model_size_flag(sz, FIELDS):
54
  def model_type_flag(line, FIELDS):
55
  if 'OpenSource' in FIELDS and line['OpenSource'] == 'Yes':
56
  return True
57
- if 'API' in FIELDS and line['OpenSource'] == 'No' and line['Verified'] == 'Yes':
58
- return True
59
- if 'Proprietary' in FIELDS and line['OpenSource'] == 'No' and line['Verified'] == 'No':
60
  return True
61
  return False
62
 
63
 
64
  def BUILD_L1_DF(results, fields):
65
  check_box = {}
66
- check_box['essential'] = ['Method', 'Param (B)', 'Language Model', 'Vision Model']
67
  # revise there to set default dataset
68
  check_box['required'] = ['Avg Score', 'Avg Rank'] + DEFAULT_BENCH
69
  check_box['avg'] = ['Avg Score', 'Avg Rank']
@@ -71,7 +69,8 @@ def BUILD_L1_DF(results, fields):
71
  type_map = defaultdict(lambda: 'number')
72
  type_map['Method'] = 'html'
73
  type_map['Language Model'] = type_map['Vision Model'] = 'html'
74
- type_map['OpenSource'] = type_map['Verified'] = 'str'
 
75
  check_box['type_map'] = type_map
76
 
77
  df = generate_table(results, fields)
@@ -105,6 +104,12 @@ def BUILD_L2_DF(results, dataset):
105
  elif k == 'Method':
106
  name, url = meta['Method']
107
  res[k].append(f'<a href="{url}">{name}</a>')
 
 
 
 
 
 
108
  else:
109
  res[k].append(meta[k])
110
  fields = [x for x in fields]
@@ -128,13 +133,14 @@ def BUILD_L2_DF(results, dataset):
128
  df = df.iloc[::-1]
129
 
130
  check_box = {}
131
- check_box['essential'] = ['Method', 'Param (B)', 'Language Model', 'Vision Model']
132
  check_box['required'] = required_fields
133
  check_box['all'] = all_fields
134
  type_map = defaultdict(lambda: 'number')
135
  type_map['Method'] = 'html'
136
  type_map['Language Model'] = type_map['Vision Model'] = 'html'
137
- type_map['OpenSource'] = type_map['Verified'] = 'str'
 
138
  check_box['type_map'] = type_map
139
  return df, check_box
140
 
@@ -159,6 +165,12 @@ def generate_table(results, fields):
159
  name, url = meta['Method']
160
  res[k].append(f'<a href="{url}">{name}</a>')
161
  res['name'].append(name)
 
 
 
 
 
 
162
  else:
163
  res[k].append(meta[k])
164
  scores, ranks = [], []
 
54
  def model_type_flag(line, FIELDS):
55
  if 'OpenSource' in FIELDS and line['OpenSource'] == 'Yes':
56
  return True
57
+ if 'API' in FIELDS and line['OpenSource'] == 'No':
 
 
58
  return True
59
  return False
60
 
61
 
62
  def BUILD_L1_DF(results, fields):
63
  check_box = {}
64
+ check_box['essential'] = ['Method', 'Param (B)', 'Language Model', 'Vision Model', 'Eval Date']
65
  # revise there to set default dataset
66
  check_box['required'] = ['Avg Score', 'Avg Rank'] + DEFAULT_BENCH
67
  check_box['avg'] = ['Avg Score', 'Avg Rank']
 
69
  type_map = defaultdict(lambda: 'number')
70
  type_map['Method'] = 'html'
71
  type_map['Language Model'] = type_map['Vision Model'] = 'html'
72
+ type_map['OpenSource'] = 'str'
73
+ type_map['Eval Date'] = 'str'
74
  check_box['type_map'] = type_map
75
 
76
  df = generate_table(results, fields)
 
104
  elif k == 'Method':
105
  name, url = meta['Method']
106
  res[k].append(f'<a href="{url}">{name}</a>')
107
+ elif k == 'Eval Date':
108
+ eval_date = meta['Time'].split('/')
109
+ assert len(eval_date) == 3
110
+ eval_date = [x if len(x) > 1 else '0' + x for x in eval_date]
111
+ eval_date = '/'.join(eval_date)
112
+ res[k].append(eval_date)
113
  else:
114
  res[k].append(meta[k])
115
  fields = [x for x in fields]
 
133
  df = df.iloc[::-1]
134
 
135
  check_box = {}
136
+ check_box['essential'] = ['Method', 'Param (B)', 'Language Model', 'Vision Model', 'Eval Date']
137
  check_box['required'] = required_fields
138
  check_box['all'] = all_fields
139
  type_map = defaultdict(lambda: 'number')
140
  type_map['Method'] = 'html'
141
  type_map['Language Model'] = type_map['Vision Model'] = 'html'
142
+ type_map['OpenSource'] = 'str'
143
+ type_map['Eval Date'] = 'str'
144
  check_box['type_map'] = type_map
145
  return df, check_box
146
 
 
165
  name, url = meta['Method']
166
  res[k].append(f'<a href="{url}">{name}</a>')
167
  res['name'].append(name)
168
+ elif k == 'Eval Date':
169
+ eval_date = meta['Time'].split('/')
170
+ assert len(eval_date) == 3
171
+ eval_date = [x if len(x) > 1 else '0' + x for x in eval_date]
172
+ eval_date = '/'.join(eval_date)
173
+ res[k].append(eval_date)
174
  else:
175
  res[k].append(meta[k])
176
  scores, ranks = [], []
meta_data.py CHANGED
@@ -20,7 +20,9 @@ This leaderboard was last updated: {}.
20
  OpenVLM Leaderboard only includes open-source VLMs or API models that are publicly available. To add your own model to the leaderboard, please create a PR in [VLMEvalKit](https://github.com/open-compass/VLMEvalKit) to support your VLM and then we will help with the evaluation and updating the leaderboard. For any questions or concerns, please feel free to contact us at [opencompass, duanhaodong]@pjlab.org.cn.
21
  """
22
  # CONSTANTS-FIELDS
23
- META_FIELDS = ['Method', 'Param (B)', 'Language Model', 'Vision Model', 'OpenSource', 'Verified']
 
 
24
  MAIN_FIELDS = [
25
  'MMBench_V11', 'MMStar', 'MME',
26
  'MMMU_VAL', 'MathVista', 'OCRBench', 'AI2D',
@@ -34,7 +36,7 @@ DEFAULT_BENCH = [
34
  ]
35
  MMBENCH_FIELDS = ['MMBench_TEST_EN_V11', 'MMBench_TEST_CN_V11', 'MMBench_TEST_EN', 'MMBench_TEST_CN', 'CCBench']
36
  MODEL_SIZE = ['<4B', '4B-10B', '10B-20B', '20B-40B', '>40B', 'Unknown']
37
- MODEL_TYPE = ['API', 'OpenSource', 'Proprietary']
38
 
39
  # The README file for each benchmark
40
  LEADERBOARD_MD = {}
 
20
  OpenVLM Leaderboard only includes open-source VLMs or API models that are publicly available. To add your own model to the leaderboard, please create a PR in [VLMEvalKit](https://github.com/open-compass/VLMEvalKit) to support your VLM and then we will help with the evaluation and updating the leaderboard. For any questions or concerns, please feel free to contact us at [opencompass, duanhaodong]@pjlab.org.cn.
21
  """
22
  # CONSTANTS-FIELDS
23
+ META_FIELDS = [
24
+ 'Method', 'Param (B)', 'Language Model', 'Vision Model', 'OpenSource', 'Eval Date'
25
+ ]
26
  MAIN_FIELDS = [
27
  'MMBench_V11', 'MMStar', 'MME',
28
  'MMMU_VAL', 'MathVista', 'OCRBench', 'AI2D',
 
36
  ]
37
  MMBENCH_FIELDS = ['MMBench_TEST_EN_V11', 'MMBench_TEST_CN_V11', 'MMBench_TEST_EN', 'MMBench_TEST_CN', 'CCBench']
38
  MODEL_SIZE = ['<4B', '4B-10B', '10B-20B', '20B-40B', '>40B', 'Unknown']
39
+ MODEL_TYPE = ['API', 'OpenSource']
40
 
41
  # The README file for each benchmark
42
  LEADERBOARD_MD = {}