Spaces:
Paused
Paused
Updated backend to use newer version of lighteval
Browse files- requirements.txt +1 -1
- src/backend/run_eval_suite_lighteval.py +24 -11
requirements.txt
CHANGED
@@ -14,7 +14,7 @@ tqdm==4.65.0
|
|
14 |
transformers
|
15 |
tokenizers>=0.15.0
|
16 |
# git+https://github.com/huggingface/lighteval.git#egg=lighteval
|
17 |
-
git+https://github.com/shaltielshmid/lighteval.git@increase-tgi-container#egg=lighteval
|
18 |
accelerate==0.24.1
|
19 |
sentencepiece
|
20 |
Levenshtein
|
|
|
14 |
transformers
|
15 |
tokenizers>=0.15.0
|
16 |
# git+https://github.com/huggingface/lighteval.git#egg=lighteval
|
17 |
+
git+https://github.com/shaltielshmid/lighteval.git@increase-tgi-container-1.5#egg=lighteval
|
18 |
accelerate==0.24.1
|
19 |
sentencepiece
|
20 |
Levenshtein
|
src/backend/run_eval_suite_lighteval.py
CHANGED
@@ -21,14 +21,29 @@ def run_evaluation(eval_request: EvalRequest, task_names: str, batch_size: int,
|
|
21 |
print("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")
|
22 |
|
23 |
args = DefaultNamespace(**{
|
24 |
-
"
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
"max_samples": limit,
|
33 |
"job_id": str(datetime.now()),
|
34 |
"push_results_to_hub": True,
|
@@ -42,9 +57,7 @@ def run_evaluation(eval_request: EvalRequest, task_names: str, batch_size: int,
|
|
42 |
"custom_tasks": "custom_tasks.py",
|
43 |
"tasks": task_names,
|
44 |
"dataset_loading_processes": 24,
|
45 |
-
"num_fewshot_seeds": 0
|
46 |
-
"reuse_existing": False,
|
47 |
-
"namespace": OWNER
|
48 |
})
|
49 |
|
50 |
try:
|
|
|
21 |
print("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")
|
22 |
|
23 |
args = DefaultNamespace(**{
|
24 |
+
"model_config": dict(model=dict(
|
25 |
+
type="endpoint",
|
26 |
+
base_params=dict(
|
27 |
+
endpoint_name=f'{eval_request.model.split("/")[1].replace(".", "-").lower()}-lighteval'[-32:],
|
28 |
+
model=eval_request.model,
|
29 |
+
revision=eval_request.revision,
|
30 |
+
dtype=eval_request.precision,
|
31 |
+
reuse_existing=False
|
32 |
+
),
|
33 |
+
instance=dict(
|
34 |
+
accelerator=accelerator,
|
35 |
+
region=region,
|
36 |
+
vendor=vendor,
|
37 |
+
instance_size=instance_size,
|
38 |
+
instance_type=instance_type,
|
39 |
+
framework='pytorch',
|
40 |
+
endpoint_type='protected',
|
41 |
+
namespace=OWNER
|
42 |
+
),
|
43 |
+
generation=dict(
|
44 |
+
add_special_tokens=True
|
45 |
+
)
|
46 |
+
)),
|
47 |
"max_samples": limit,
|
48 |
"job_id": str(datetime.now()),
|
49 |
"push_results_to_hub": True,
|
|
|
57 |
"custom_tasks": "custom_tasks.py",
|
58 |
"tasks": task_names,
|
59 |
"dataset_loading_processes": 24,
|
60 |
+
"num_fewshot_seeds": 0
|
|
|
|
|
61 |
})
|
62 |
|
63 |
try:
|