backend_demo

Paused

File size: 5,720 Bytes

1ffc326
 
 
79410f6
 
 
 
18abd06
0f5c75a
1ffc326
 
08ae6c5
 
1ffc326
 
 
 
 
 
 
 
ffccb41
1ffc326
55cc480
 
1ffc326
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0f5c75a
1ffc326
 
 
 
 
 
 
 
 
 
 
3af3367
 
 
 
6902167
3af3367
ca54606
3af3367
59447e3
d3573c0
17f7b4d
3af3367
17f7b4d
3af3367
ca8bbe5
d3573c0
 
5c49547
398ca01
 
 
ffccb41
398ca01
 
 
 
 
0f5c75a
98eee75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15b1ab3
98eee75
15b1ab3
98eee75
15b1ab3
98eee75
0f5c75a
 
 
 
 
 
 
 
 
08ae6c5
1ffc326
 
08ae6c5
1ffc326
7135a84
08ae6c5
 
 
 
 
1ffc326

import logging
import pprint

from huggingface_hub import snapshot_download

logging.getLogger("openai").setLevel(logging.WARNING)

from src.backend.run_eval_suite_lighteval import run_evaluation
from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request, set_requests_seen
from src.backend.sort_queue import sort_models_by_priority

from src.envs import QUEUE_REPO, EVAL_REQUESTS_PATH_BACKEND, RESULTS_REPO, EVAL_RESULTS_PATH_BACKEND, API, LIMIT, TOKEN, ACCELERATOR, VENDOR, REGION
from src.about import TASKS_LIGHTEVAL

logging.basicConfig(level=logging.ERROR)
pp = pprint.PrettyPrinter(width=80)

PENDING_STATUS = "PENDING"
RUNNING_STATUS = "RUNNING"
FINISHED_STATUS = "FINISHED"
FAILED_STATUS = "FAILED"
REJECTED_STATUS = "REJECTED"

snapshot_download(repo_id=RESULTS_REPO, revision="main", local_dir=EVAL_RESULTS_PATH_BACKEND, repo_type="dataset", max_workers=60, token=TOKEN)
snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60, token=TOKEN)

def run_auto_eval():
    current_pending_status = [PENDING_STATUS]

    # pull the eval dataset from the hub and parse any eval requests
    # check completed evals and set them to finished
    check_completed_evals(
        api=API,
        checked_status=RUNNING_STATUS,
        completed_status=FINISHED_STATUS,
        failed_status=FAILED_STATUS,
        hf_repo=QUEUE_REPO,
        local_dir=EVAL_REQUESTS_PATH_BACKEND,
        hf_repo_results=RESULTS_REPO,
        local_dir_results=EVAL_RESULTS_PATH_BACKEND
    )

    # Get all eval request that are PENDING, if you want to run other evals, change this parameter
    eval_requests, requests_seen = get_eval_requests(job_status=current_pending_status, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
    # Sort the evals by priority (first submitted first run)
    eval_requests = sort_models_by_priority(api=API, models=eval_requests)

    print(f"Found {len(eval_requests)} {','.join(current_pending_status)} eval requests")

    if len(eval_requests) == 0:
        return

    eval_request = eval_requests[0]
    pp.pprint(eval_request)

    params_size = eval_request.params
    if eval_request.precision == '4bit':
        params_size //= 2
    
    # For GPU
    if not eval_request or params_size < 0:
        raise ValueError("Couldn't detect number of params, please make sure the metadata is available")
    elif params_size < 4:
        instance_size, instance_type, cap = "x1", "nvidia-t4", 40
    elif params_size < 8:
        instance_size, instance_type, cap = "x1", "nvidia-a10g", 40
    elif params_size < 30:
        instance_size, instance_type, cap = "x4", "nvidia-a10g", 20
    elif params_size < 45:
        instance_size, instance_type, cap = "x2", "nvidia-a100", 5
    elif params_size < 80:
        instance_size, instance_type, cap = "x4", "nvidia-a100", 5
    else: 
        set_eval_request(
            api=API,
            eval_request=eval_request,
            set_to_status=REJECTED_STATUS,
            hf_repo=QUEUE_REPO,
            local_dir=EVAL_REQUESTS_PATH_BACKEND,
        )
        pp.pprint(dict(message="Number of params too big, can't run this model", params=eval_request.params))
        return
    
    # ignore counters, from admins
    is_admin = any(org['name'] == 'hebrew-llm-leaderboard' for org in eval_request.user_info.get('orgs', []))
    if not is_admin:
        counter_key = f'count_{instance_size}_{instance_type}'
        if not counter_key in requests_seen:
            requests_seen[counter_key] = 0
        if requests_seen[counter_key] >= cap:
            set_eval_request(
                api=API,
                eval_request=eval_request,
                set_to_status=REJECTED_STATUS,
                hf_repo=QUEUE_REPO,
                local_dir=EVAL_REQUESTS_PATH_BACKEND,
            )
            pp.pprint(dict(message="Reached maximum cap for requests of this instance type this month", counter=counter_key, instance_type=instance_type, cap=cap))
            return

        # next, check the user that submitted the request - allow up to 4 per user
        user = eval_request.user_info['name']
        if user in requests_seen and len(requests_seen[user]) >= 4:
            set_eval_request(
                api=API,
                eval_request=eval_request,
                set_to_status=REJECTED_STATUS,
                hf_repo=QUEUE_REPO,
                local_dir=EVAL_REQUESTS_PATH_BACKEND,
            )
            pp.pprint(dict(message="Reached maximum cap for requests for this user this month", counter=counter_key, user=user))
            return
        if not user in requests_seen:
            requests_seen[user] = []
        
        requests_seen[user].append(dict(model_id=eval_request.model, revision=eval_request.revision))
        requests_seen[counter_key] += 1
        set_requests_seen(
            api=API,
            requests_seen=requests_seen,
            hf_repo=QUEUE_REPO,
            local_dir=EVAL_REQUESTS_PATH_BACKEND
        )
        # end of counters check for non-admins

    set_eval_request(
        api=API,
        eval_request=eval_request,
        set_to_status=RUNNING_STATUS,
        hf_repo=QUEUE_REPO,
        local_dir=EVAL_REQUESTS_PATH_BACKEND,
    )
    

    run_evaluation(
        eval_request=eval_request, 
        task_names=TASKS_LIGHTEVAL, 
        local_dir=EVAL_RESULTS_PATH_BACKEND,
        batch_size=25, 
        accelerator=ACCELERATOR, 
        region=REGION, 
        vendor=VENDOR, 
        instance_size=instance_size, 
        instance_type=instance_type,  
        limit=LIMIT
        )


if __name__ == "__main__":
    run_auto_eval()