Initial leaderboard
Browse files- .gitignore +3 -0
- README.md +1 -1
- app.py +13 -3
- leaderboard.csv +15 -0
- requirements.txt +2 -2
- templates/index.html +98 -0
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
.idea/
|
2 |
+
__pycache__/*
|
3 |
+
copy_data.sh
|
README.md
CHANGED
@@ -8,4 +8,4 @@ pinned: false
|
|
8 |
license: mit
|
9 |
---
|
10 |
|
11 |
-
|
|
|
8 |
license: mit
|
9 |
---
|
10 |
|
11 |
+
# Stick To Your Role! Leaderboard
|
app.py
CHANGED
@@ -1,9 +1,19 @@
|
|
1 |
-
from flask import Flask
|
|
|
|
|
2 |
app = Flask(__name__)
|
3 |
|
4 |
@app.route('/')
|
5 |
-
def
|
6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
if __name__ == '__main__':
|
9 |
app.run(host='0.0.0.0', port=7860, debug=True)
|
|
|
1 |
+
from flask import Flask, render_template
|
2 |
+
import pandas as pd
|
3 |
+
|
4 |
app = Flask(__name__)
|
5 |
|
6 |
@app.route('/')
|
7 |
+
def index():
|
8 |
+
# Load the CSV file into a DataFrame
|
9 |
+
df = pd.read_csv('leaderboard.csv')
|
10 |
+
|
11 |
+
df.insert(0, 'Rank', '')
|
12 |
+
# Convert the DataFrame to HTML
|
13 |
+
table_html = df.to_html(classes='table table-striped table-bordered', index=False)
|
14 |
+
|
15 |
+
# Render the template with the table HTML
|
16 |
+
return render_template('index.html', table_html=table_html)
|
17 |
|
18 |
if __name__ == '__main__':
|
19 |
app.run(host='0.0.0.0', port=7860, debug=True)
|
leaderboard.csv
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Model,Score (Cardinal),Win rate (Ordinal),Rank-Order Stability,Rank_distance,CFI,SRMR,RMSEA,Cronbach_alpha
|
2 |
+
phi-3-mini-128k-instruct,0.18707102480796897,0.258309591642925,0.16880341880341881,0.7264957264957265,0.32478632478632474,0.5555555555555556,0.6239316239316239,0.23076923076923084
|
3 |
+
phi-3-medium-128k-instruct,0.18789267301588508,0.28490028490028496,0.26282051282051283,0.7521367521367521,0.2820512820512821,0.7692307692307692,0.7606837606837606,0.5128205128205128
|
4 |
+
Mistral-7B-Instruct-v0.1,0.23063750442486428,0.3418803418803419,0.12393162393162394,0.4786324786324786,0.49572649572649574,0.3076923076923077,0.2222222222222222,0.09401709401709403
|
5 |
+
Mistral-7B-Instruct-v0.2,0.1747138068267554,0.3342830009496676,0.41666666666666674,0.4358974358974358,0.1282051282051282,0.829059829059829,0.8205128205128205,0.2991452991452992
|
6 |
+
Mistral-7B-Instruct-v0.3,0.20131219867252867,0.3133903133903134,0.23504273504273498,0.4273504273504273,0.28205128205128205,0.6324786324786325,0.5897435897435896,0.24786324786324787
|
7 |
+
Mixtral-8x7B-Instruct-v0.1,0.2437400779497571,0.43114909781576455,0.5811965811965812,0.641025641025641,0.2136752136752137,0.7863247863247863,0.7521367521367521,0.5213675213675214
|
8 |
+
Mixtral-8x22B-Instruct-v0.1,0.18791617935864172,0.29629629629629634,0.37820512820512825,0.6837606837606838,0.1794871794871795,0.9230769230769231,0.9145299145299145,0.49572649572649574
|
9 |
+
command_r_plus,0.3737946817620246,0.560303893637227,0.6880341880341879,0.6923076923076923,0.45299145299145294,0.5128205128205128,0.5811965811965811,0.6239316239316239
|
10 |
+
llama_3_8b_instruct,0.28828624999947805,0.4691358024691358,0.5747863247863247,0.5470085470085471,0.3162393162393162,0.7008547008547008,0.6923076923076923,0.5470085470085471
|
11 |
+
llama_3_70b_instruct,0.5976823900754995,0.7701804368471036,0.9380341880341881,0.7264957264957265,0.8376068376068376,0.4273504273504274,0.49572649572649574,0.9914529914529915
|
12 |
+
Qwen2-7B-Instruct,0.3400513233761655,0.5251661918328584,0.5769230769230768,0.5811965811965811,0.4188034188034188,0.45299145299145294,0.4871794871794872,0.5213675213675214
|
13 |
+
Qwen2-72B-Instruct,0.42123592516768155,0.5906932573599241,0.9658119658119655,0.5811965811965811,0.07692307692307693,0.9658119658119658,0.9914529914529915,0.9145299145299146
|
14 |
+
gpt-3.5-turbo-0125,0.14920836189480854,0.23741690408357075,0.24145299145299137,0.7777777777777778,0.1965811965811966,0.717948717948718,0.7094017094017093,0.1794871794871795
|
15 |
+
gpt-4o-0513,0.5383734693976642,0.7340930674264008,0.8482905982905984,0.6666666666666667,0.811965811965812,0.41025641025641024,0.3418803418803419,0.8205128205128205
|
requirements.txt
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
-
flask
|
2 |
-
|
|
|
1 |
+
flask==3.0.3
|
2 |
+
pandas
|
templates/index.html
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html lang="en">
|
3 |
+
<head>
|
4 |
+
<meta charset="UTF-8">
|
5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6 |
+
<title>Stick To Your Role! Leaderboard</title>
|
7 |
+
<!-- Include Bootstrap CSS for styling -->
|
8 |
+
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/5.1.3/css/bootstrap.min.css">
|
9 |
+
<!-- Include DataTables CSS -->
|
10 |
+
<link rel="stylesheet" href="https://cdn.datatables.net/1.11.5/css/dataTables.bootstrap5.min.css">
|
11 |
+
<!-- Custom CSS for additional styling -->
|
12 |
+
<style>
|
13 |
+
body {
|
14 |
+
background-color: #f8f9fa;
|
15 |
+
font-family: 'Arial', sans-serif;
|
16 |
+
}
|
17 |
+
.container {
|
18 |
+
margin-top: 50px;
|
19 |
+
background: #fff;
|
20 |
+
padding: 20px;
|
21 |
+
border-radius: 8px;
|
22 |
+
box-shadow: 0 4px 8px rgba(0,0,0,0.1);
|
23 |
+
}
|
24 |
+
h1 {
|
25 |
+
color: #333;
|
26 |
+
text-align: center;
|
27 |
+
}
|
28 |
+
.table-responsive {
|
29 |
+
margin-top: 20px;
|
30 |
+
}
|
31 |
+
table {
|
32 |
+
border-collapse: separate;
|
33 |
+
border-spacing: 0 10px;
|
34 |
+
}
|
35 |
+
table thead th {
|
36 |
+
background-color: #007bff;
|
37 |
+
color: white;
|
38 |
+
border: none;
|
39 |
+
}
|
40 |
+
table tbody tr {
|
41 |
+
background-color: #fff;
|
42 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
43 |
+
}
|
44 |
+
table tbody tr:hover {
|
45 |
+
background-color: #f1f1f1;
|
46 |
+
}
|
47 |
+
table td, table th {
|
48 |
+
padding: 12px 15px;
|
49 |
+
border: none;
|
50 |
+
}
|
51 |
+
table th:first-child, table td:first-child {
|
52 |
+
border-top-left-radius: 8px;
|
53 |
+
border-bottom-left-radius: 8px;
|
54 |
+
}
|
55 |
+
table th:last-child, table td:last-child {
|
56 |
+
border-top-right-radius: 8px;
|
57 |
+
border-bottom-right-radius: 8px;
|
58 |
+
}
|
59 |
+
</style>
|
60 |
+
</head>
|
61 |
+
<body>
|
62 |
+
<div class="container">
|
63 |
+
<h1 class="mt-5 text-center">Stick To Your Role! Leaderboard</h1>
|
64 |
+
<div class="table-responsive">
|
65 |
+
<!-- Render the table HTML here -->
|
66 |
+
{{ table_html|safe }}
|
67 |
+
</div>
|
68 |
+
</div>
|
69 |
+
|
70 |
+
<!-- Include jQuery -->
|
71 |
+
<script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
|
72 |
+
<!-- Include Bootstrap JS -->
|
73 |
+
<script src="https://stackpath.bootstrapcdn.com/bootstrap/5.1.3/js/bootstrap.bundle.min.js"></script>
|
74 |
+
<!-- Include DataTables JS -->
|
75 |
+
<script src="https://cdn.datatables.net/1.11.5/js/jquery.dataTables.min.js"></script>
|
76 |
+
<script src="https://cdn.datatables.net/1.11.5/js/dataTables.bootstrap5.min.js"></script>
|
77 |
+
<!-- Initialize DataTables -->
|
78 |
+
<script>
|
79 |
+
$(document).ready(function() {
|
80 |
+
const table = $('table').DataTable({
|
81 |
+
"paging": false,
|
82 |
+
"info": false,
|
83 |
+
"columnDefs": [
|
84 |
+
{ "orderable": false, "targets": 0 },
|
85 |
+
{ "searchable": false, "targets": 0 }
|
86 |
+
],
|
87 |
+
"order": [[ 1, 'asc' ]],
|
88 |
+
"drawCallback": function(settings) {
|
89 |
+
var api = this.api();
|
90 |
+
api.column(0, {order:'applied'}).nodes().each(function(cell, i) {
|
91 |
+
cell.innerHTML = i + 1;
|
92 |
+
});
|
93 |
+
}
|
94 |
+
});
|
95 |
+
});
|
96 |
+
</script>
|
97 |
+
</body>
|
98 |
+
</html>
|