text
Browse files- app.py +2 -2
- src/about.py +13 -2
- src/display/utils.py +5 -2
app.py
CHANGED
@@ -76,7 +76,7 @@ def init_leaderboard(dataframe):
|
|
76 |
ColumnFilter(
|
77 |
AutoEvalColumn.params.name,
|
78 |
type="slider",
|
79 |
-
min=0.
|
80 |
max=150,
|
81 |
label="Select the number of parameters (B)",
|
82 |
),
|
@@ -95,7 +95,7 @@ with demo:
|
|
95 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
96 |
|
97 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
98 |
-
with gr.TabItem("๐
|
99 |
leaderboard = init_leaderboard(LEADERBOARD_DF)
|
100 |
|
101 |
with gr.TabItem("๐ About", elem_id="llm-benchmark-tab-table", id=2):
|
|
|
76 |
ColumnFilter(
|
77 |
AutoEvalColumn.params.name,
|
78 |
type="slider",
|
79 |
+
min=0.00,
|
80 |
max=150,
|
81 |
label="Select the number of parameters (B)",
|
82 |
),
|
|
|
95 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
96 |
|
97 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
98 |
+
with gr.TabItem("๐
Pinocchio ITA Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
99 |
leaderboard = init_leaderboard(LEADERBOARD_DF)
|
100 |
|
101 |
with gr.TabItem("๐ About", elem_id="llm-benchmark-tab-table", id=2):
|
src/about.py
CHANGED
@@ -27,11 +27,22 @@ NUM_FEWSHOT = 0 # Change with your few shot
|
|
27 |
|
28 |
|
29 |
# Your leaderboard name
|
30 |
-
TITLE = """<h1 align="center" id="space-title"
|
31 |
|
32 |
# What does your leaderboard evaluate?
|
33 |
INTRODUCTION_TEXT = """
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
"""
|
36 |
|
37 |
# Which evaluations are you running? how can people reproduce what you have?
|
|
|
27 |
|
28 |
|
29 |
# Your leaderboard name
|
30 |
+
TITLE = """<h1 align="center" id="space-title">๐ฎ๐น Pinocchio ITA leaderboard from <a href="https://mii-llm.ai">mii-llm</a>๐ฎ๐น</h1>"""
|
31 |
|
32 |
# What does your leaderboard evaluate?
|
33 |
INTRODUCTION_TEXT = """
|
34 |
+
Pinocchio ITA leaderboard is an effort from <a href="https://mii-llm.ai">mii-llm lab</a> of creating specialized evaluations and models on Italian subjects.
|
35 |
+
We also released the <a href="https://huggingface.co/datasets/mii-llm/pinocchio">Pinocchio dataset</a> a multimodal evaluation dataset for Italian.
|
36 |
+
A PR on lm-evaliation-harness is open at the following link if you want to evaluate on your own hardware.
|
37 |
+
The open source models are evaluated on the following subjects on Pinocchio tasks:
|
38 |
+
<ul>
|
39 |
+
<li>Generale</li>
|
40 |
+
<li>Logica</li>
|
41 |
+
<li>Lingua straniera</li>
|
42 |
+
<li>Matematica e scienze</li>
|
43 |
+
<li>Diritto</li>
|
44 |
+
<li>Cultura</li>
|
45 |
+
</ul>
|
46 |
"""
|
47 |
|
48 |
# Which evaluations are you running? how can people reproduce what you have?
|
src/display/utils.py
CHANGED
@@ -64,7 +64,8 @@ class ModelDetails:
|
|
64 |
class ModelType(Enum):
|
65 |
PT = ModelDetails(name="pretrained", symbol="๐ข")
|
66 |
FT = ModelDetails(name="fine-tuned", symbol="๐ถ")
|
67 |
-
IFT = ModelDetails(name="instruction-tuned", symbol="โญ")
|
|
|
68 |
RL = ModelDetails(name="RL-tuned", symbol="๐ฆ")
|
69 |
Unknown = ModelDetails(name="", symbol="?")
|
70 |
|
@@ -79,8 +80,10 @@ class ModelType(Enum):
|
|
79 |
return ModelType.PT
|
80 |
if "RL-tuned" in type or "๐ฆ" in type:
|
81 |
return ModelType.RL
|
82 |
-
if "
|
83 |
return ModelType.IFT
|
|
|
|
|
84 |
return ModelType.Unknown
|
85 |
|
86 |
class WeightType(Enum):
|
|
|
64 |
class ModelType(Enum):
|
65 |
PT = ModelDetails(name="pretrained", symbol="๐ข")
|
66 |
FT = ModelDetails(name="fine-tuned", symbol="๐ถ")
|
67 |
+
#IFT = ModelDetails(name="instruction-tuned", symbol="โญ")
|
68 |
+
IFT = ModelDetails(name="merged", symbol="โญ")
|
69 |
RL = ModelDetails(name="RL-tuned", symbol="๐ฆ")
|
70 |
Unknown = ModelDetails(name="", symbol="?")
|
71 |
|
|
|
80 |
return ModelType.PT
|
81 |
if "RL-tuned" in type or "๐ฆ" in type:
|
82 |
return ModelType.RL
|
83 |
+
if "merged" in type or "โญ" in type:
|
84 |
return ModelType.IFT
|
85 |
+
#if "instruction-tuned" in type or "โญ" in type:
|
86 |
+
# return ModelType.IFT
|
87 |
return ModelType.Unknown
|
88 |
|
89 |
class WeightType(Enum):
|