backend

Sleeping

App Files Files Community

meg-huggingface commited on Jul 18

Commit

20fd212

•

1 Parent(s): 5c33832

Background scheduling of the evaluation.

Browse files

Files changed (4) hide show

app.py +21 -13
src/backend/inference_endpoint.py +9 -4
src/backend/manage_requests.py +0 -1
src/backend/run_toxicity_eval.py +3 -1

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import logging
 from src.logging import configure_root_logger
 logging.getLogger("httpx").setLevel(logging.WARNING)
@@ -8,7 +9,7 @@ configure_root_logger()
 from functools import partial
 import gradio as gr
-from main_backend_toxicity import run_auto_eval
 from src.display.log_visualizer import log_file_to_html_string
 from src.display.css_html_js import dark_mode_gradio_js
 from src.envs import REFRESH_RATE, REPO_ID, QUEUE_REPO, RESULTS_REPO
@@ -32,28 +33,35 @@ links_md = f"""
 | Results Repo    | [{RESULTS_REPO}](https://huggingface.co/datasets/{RESULTS_REPO}) |
 """
-def button_auto_eval():
-    logger.info("Manually triggering Auto Eval")
-    run_auto_eval()
 reverse_order_checkbox = gr.Checkbox(label="Reverse Order", value=False)
-with gr.Blocks(js=dark_mode_gradio_js) as demo:
     gr.Markdown(intro_md)
     with gr.Tab("Application"):
-        output_html = gr.HTML(partial(log_file_to_html_string, reverse=reverse_order_checkbox), every=1)
         with gr.Row():
-            download_button = gr.DownloadButton("Download Log File", value=log_file)
             with gr.Accordion('Log View Configuration', open=False):
                 reverse_order_checkbox.render()
         # Add a button that when pressed, triggers run_auto_eval
         button = gr.Button("Manually Run Evaluation")
         gr.Markdown(links_md)
-        button.click(fn=button_auto_eval, inputs=[], outputs=[])
-dummy = gr.Markdown(run_auto_eval, every=REFRESH_RATE, visible=False)
 if __name__ == '__main__':
-    demo.queue(default_concurrency_limit=40).launch(server_name="0.0.0.0",
-                                                    show_error=True, server_port=7860)

+from apscheduler.schedulers.background import BackgroundScheduler
 import logging
 from src.logging import configure_root_logger
 logging.getLogger("httpx").setLevel(logging.WARNING)
 from functools import partial
 import gradio as gr
+import main_backend_toxicity
 from src.display.log_visualizer import log_file_to_html_string
 from src.display.css_html_js import dark_mode_gradio_js
 from src.envs import REFRESH_RATE, REPO_ID, QUEUE_REPO, RESULTS_REPO
 | Results Repo    | [{RESULTS_REPO}](https://huggingface.co/datasets/{RESULTS_REPO}) |
 """
+def auto_eval():
+    logger.info("Triggering Auto Eval")
+    main_backend_toxicity.run_auto_eval()
 reverse_order_checkbox = gr.Checkbox(label="Reverse Order", value=False)
+with gr.Blocks(js=dark_mode_gradio_js) as backend_ui:
     gr.Markdown(intro_md)
     with gr.Tab("Application"):
+        output_html = gr.HTML(partial(log_file_to_html_string,
+                                      reverse=reverse_order_checkbox), every=10)
         with gr.Row():
+            download_button = gr.DownloadButton("Download Log File",
+                                                value=log_file)
             with gr.Accordion('Log View Configuration', open=False):
                 reverse_order_checkbox.render()
         # Add a button that when pressed, triggers run_auto_eval
         button = gr.Button("Manually Run Evaluation")
         gr.Markdown(links_md)
+        # This will run the eval before fully loading the UI,
+        # and the UI will error out if it takes longer than 30 seconds.
+        # Changing to use BackgroundScheduler instead.
+        # dummy = gr.Markdown(main_backend_toxicity.run_auto_eval(), every=REFRESH_RATE, visible=False)
+        button.click(fn=auto_eval, inputs=[], outputs=[])
 if __name__ == '__main__':
+    scheduler = BackgroundScheduler()
+    scheduler.add_job(auto_eval, "interval", seconds=REFRESH_RATE)
+    scheduler.start()
+    backend_ui.queue(default_concurrency_limit=40).launch(server_name="0.0.0.0",
+                                                          show_error=True,
+                                                          server_port=7860)

src/backend/inference_endpoint.py CHANGED Viewed

@@ -10,6 +10,7 @@ import requests
 logging.basicConfig(level=logging.DEBUG)
 logger = setup_logger(__name__)
 TIMEOUT = 20
 def create_endpoint(endpoint_name, repository, framework='pytorch',
@@ -26,7 +27,8 @@ def create_endpoint(endpoint_name, repository, framework='pytorch',
                                              vendor=vendor, region=region,
                                              type=type,
                                              instance_size=instance_size,
-                                             instance_type=instance_type)
     except huggingface_hub.utils._errors.HfHubHTTPError as e:
         # Workload with the same name already exists error.
         # Use it again, just make sure it has the right settings.
@@ -38,7 +40,8 @@ def create_endpoint(endpoint_name, repository, framework='pytorch',
                         framework=framework, task=task,
                         accelerator=accelerator,
                         instance_size=instance_size,
-                        instance_type=instance_type)
     except requests.exceptions.HTTPError as e:
         # Not enough compute, wrong compute, or quota exceeded
         logger.debug("Hit error:")
@@ -92,9 +95,11 @@ def update_endpoint_exception(endpoint):
     cur_instance_size = raw_info['compute']['instanceSize']
     cur_instance_type = raw_info['compute']['instanceType']
     if (cur_instance_type, cur_instance_size) == ('nvidia-l4', 'x4'):
-        endpoint.update(instance_size='x1', instance_type='nvidia-a100')
     elif (cur_instance_type, cur_instance_size) == ('a100', 'x1'):
-        endpoint.update(instance_size='x4', instance_type='nvidia-a10g')
     else:
         logger.info(
             "Getting expensive to try to run this model without human oversight. Exiting.")

 logging.basicConfig(level=logging.DEBUG)
 logger = setup_logger(__name__)
 TIMEOUT = 20
+MAX_REPLICA = 3
 def create_endpoint(endpoint_name, repository, framework='pytorch',
                                              vendor=vendor, region=region,
                                              type=type,
                                              instance_size=instance_size,
+                                             instance_type=instance_type,
+                                             max_replica=MAX_REPLICA)
     except huggingface_hub.utils._errors.HfHubHTTPError as e:
         # Workload with the same name already exists error.
         # Use it again, just make sure it has the right settings.
                         framework=framework, task=task,
                         accelerator=accelerator,
                         instance_size=instance_size,
+                        instance_type=instance_type,
+                        max_replica=MAX_REPLICA)
     except requests.exceptions.HTTPError as e:
         # Not enough compute, wrong compute, or quota exceeded
         logger.debug("Hit error:")
     cur_instance_size = raw_info['compute']['instanceSize']
     cur_instance_type = raw_info['compute']['instanceType']
     if (cur_instance_type, cur_instance_size) == ('nvidia-l4', 'x4'):
+        endpoint.update(instance_size='x1', instance_type='nvidia-a100',
+                        max_replica=MAX_REPLICA)
     elif (cur_instance_type, cur_instance_size) == ('a100', 'x1'):
+        endpoint.update(instance_size='x4', instance_type='nvidia-a10g',
+                        max_replica=MAX_REPLICA)
     else:
         logger.info(
             "Getting expensive to try to run this model without human oversight. Exiting.")

src/backend/manage_requests.py CHANGED Viewed

@@ -91,7 +91,6 @@ def get_eval_requests(job_status: list, local_dir: str, hf_repo: str) -> list[Ev
         # TODO: isn't job_status the string "RUNNING"?
         if data["status"] in job_status:
             data["json_filepath"] = json_filepath
-            print(data.items())
             eval_request = EvalRequest(**data)
             eval_requests.append(eval_request)

         # TODO: isn't job_status the string "RUNNING"?
         if data["status"] in job_status:
             data["json_filepath"] = json_filepath
             eval_request = EvalRequest(**data)
             eval_requests.append(eval_request)

src/backend/run_toxicity_eval.py CHANGED Viewed

@@ -5,6 +5,7 @@ import time
 from datetime import datetime
 import sys
 from tqdm import tqdm
 import requests
 from requests.adapters import HTTPAdapter, Retry
@@ -167,7 +168,8 @@ def main(endpoint_url, eval_request):
     ds = load_dataset("allenai/real-toxicity-prompts")
     prompts = [row['text'] for row in ds['train']['prompt']]
     # All the generated responses from the endpoint
-    generated_responses = map(lambda x: get_generation(endpoint_url, x), prompts[:DATASET_CUTOFF])
     att_scores_out = score_generations(prompts, generated_responses)
     logger.debug("Scores are:")
     logger.debug(att_scores_out)

 from datetime import datetime
 import sys
 from tqdm import tqdm
+from multiprocessing import Pool
 import requests
 from requests.adapters import HTTPAdapter, Retry
     ds = load_dataset("allenai/real-toxicity-prompts")
     prompts = [row['text'] for row in ds['train']['prompt']]
     # All the generated responses from the endpoint
+    with Pool() as pool:
+        generated_responses = pool.map(lambda x: get_generation(endpoint_url, x), prompts[:DATASET_CUTOFF])
     att_scores_out = score_generations(prompts, generated_responses)
     logger.debug("Scores are:")
     logger.debug(att_scores_out)