meg-huggingface commited on
Commit
7dd405e
1 Parent(s): 5169c06

Inference endpoints and parallelism.

Browse files
app.py CHANGED
@@ -1,9 +1,5 @@
1
  from apscheduler.schedulers.background import BackgroundScheduler
2
- import logging
3
  from src.logging import configure_root_logger
4
- logging.getLogger("httpx").setLevel(logging.WARNING)
5
- logging.getLogger("numexpr").setLevel(logging.WARNING)
6
- logging.getLogger("absl").setLevel(logging.WARNING)
7
  configure_root_logger()
8
 
9
  from functools import partial
@@ -15,7 +11,6 @@ from src.display.css_html_js import dark_mode_gradio_js
15
  from src.envs import REFRESH_RATE, REPO_ID, QUEUE_REPO, RESULTS_REPO
16
  from src.logging import setup_logger, log_file
17
 
18
- logging.basicConfig(level=logging.INFO)
19
  logger = setup_logger(__name__)
20
 
21
  intro_md = f"""
@@ -37,7 +32,7 @@ def auto_eval():
37
  logger.info("Triggering Auto Eval")
38
  main_backend_toxicity.run_auto_eval()
39
 
40
- reverse_order_checkbox = gr.Checkbox(label="Reverse Order", value=False)
41
 
42
  with gr.Blocks(js=dark_mode_gradio_js) as backend_ui:
43
  gr.Markdown(intro_md)
 
1
  from apscheduler.schedulers.background import BackgroundScheduler
 
2
  from src.logging import configure_root_logger
 
 
 
3
  configure_root_logger()
4
 
5
  from functools import partial
 
11
  from src.envs import REFRESH_RATE, REPO_ID, QUEUE_REPO, RESULTS_REPO
12
  from src.logging import setup_logger, log_file
13
 
 
14
  logger = setup_logger(__name__)
15
 
16
  intro_md = f"""
 
32
  logger.info("Triggering Auto Eval")
33
  main_backend_toxicity.run_auto_eval()
34
 
35
+ reverse_order_checkbox = gr.Checkbox(label="Reverse Order", value=True)
36
 
37
  with gr.Blocks(js=dark_mode_gradio_js) as backend_ui:
38
  gr.Markdown(intro_md)
main_backend_toxicity.py CHANGED
@@ -1,4 +1,3 @@
1
- import logging
2
  import pprint
3
  import re
4
  from huggingface_hub import snapshot_download, delete_inference_endpoint
@@ -13,10 +12,8 @@ from src.envs import (QUEUE_REPO, EVAL_REQUESTS_PATH_BACKEND, RESULTS_REPO,
13
  #, LIMIT, ACCELERATOR, VENDOR, REGION
14
  from src.logging import setup_logger
15
 
16
- logging.getLogger("openai").setLevel(logging.DEBUG)
17
  logger = setup_logger(__name__)
18
 
19
- # logging.basicConfig(level=logging.ERROR)
20
  pp = pprint.PrettyPrinter(width=80)
21
 
22
  PENDING_STATUS = "PENDING"
@@ -72,8 +69,8 @@ def run_auto_eval():
72
  endpoint_url = create_endpoint(endpoint_name, model_repository)
73
  logger.info("Created an endpoint url at %s" % endpoint_url)
74
  results = main(endpoint_url, eval_request)
75
- logger.debug("FINISHED!")
76
- logger.debug(results)
77
  logger.info(f'Completed Evaluation of {eval_request.json_filepath}')
78
  set_eval_request(api=API,
79
  eval_request=eval_request,
 
 
1
  import pprint
2
  import re
3
  from huggingface_hub import snapshot_download, delete_inference_endpoint
 
12
  #, LIMIT, ACCELERATOR, VENDOR, REGION
13
  from src.logging import setup_logger
14
 
 
15
  logger = setup_logger(__name__)
16
 
 
17
  pp = pprint.PrettyPrinter(width=80)
18
 
19
  PENDING_STATUS = "PENDING"
 
69
  endpoint_url = create_endpoint(endpoint_name, model_repository)
70
  logger.info("Created an endpoint url at %s" % endpoint_url)
71
  results = main(endpoint_url, eval_request)
72
+ logger.info("FINISHED!")
73
+ logger.info(results)
74
  logger.info(f'Completed Evaluation of {eval_request.json_filepath}')
75
  set_eval_request(api=API,
76
  eval_request=eval_request,
src/backend/inference_endpoint.py CHANGED
@@ -1,13 +1,10 @@
1
  import sys
2
- import huggingface_hub.utils._errors
3
  from time import sleep
4
- import logging
5
  from huggingface_hub import create_inference_endpoint, get_inference_endpoint
6
  from src.backend.run_toxicity_eval import get_generation
7
  from src.logging import setup_logger
8
  import requests
9
 
10
- logging.basicConfig(level=logging.DEBUG)
11
  logger = setup_logger(__name__)
12
  TIMEOUT = 20
13
  MAX_REPLICA = 1
@@ -18,8 +15,13 @@ def create_endpoint(endpoint_name, repository, framework='pytorch',
18
  region='us-east-1', type='protected', instance_size='x4',
19
  instance_type='nvidia-l4'):
20
  logger.info("Creating endpoint %s..." % endpoint_name)
21
- # TODO(mm): Handle situation where it's paused
22
  try:
 
 
 
 
 
23
  endpoint = create_inference_endpoint(endpoint_name,
24
  repository=repository,
25
  framework=framework, task=task,
@@ -29,65 +31,39 @@ def create_endpoint(endpoint_name, repository, framework='pytorch',
29
  instance_size=instance_size,
30
  instance_type=instance_type,
31
  max_replica=MAX_REPLICA)
32
- except huggingface_hub.utils._errors.HfHubHTTPError as e:
33
- # Workload with the same name already exists error.
34
- # Use it again, just make sure it has the right settings.
35
- # TODO(mm): Is this error even catching?
36
- logger.debug("Hit error:")
37
- logger.debug(e)
38
- logger.debug("Attempting to update with the given parameters.")
39
- endpoint = get_inference_endpoint(endpoint_name)
40
- endpoint.update(repository=repository,
41
- framework=framework, task=task,
42
- accelerator=accelerator,
43
- instance_size=instance_size,
44
- instance_type=instance_type,
45
- max_replica=MAX_REPLICA)
46
- except requests.exceptions.HTTPError as e:
47
- # Not enough compute, wrong compute, or quota exceeded
48
- logger.debug("Hit error:")
49
- logger.debug(e)
50
- logger.debug("Attempting a different compute.")
51
- endpoint = update_endpoint_exception(endpoint)
52
- except Exception as e:
53
- logger.debug("Hit unaccounted-for error")
54
- logger.debug(e)
55
- sys.exit()
56
- endpoint.fetch()
57
  logger.info("Endpoint status: %s." % endpoint.status)
58
  if endpoint.status == 'scaledToZero':
59
  # Send a request to wake it up.
60
  get_generation(endpoint.url, "Wake up")
61
  sleep(TIMEOUT)
62
- elif endpoint.status == 'failed':
63
- logger.info("Endpoint failed, attempting to change compute.")
64
- endpoint = update_endpoint_exception(endpoint)
65
  wait_for_endpoint(endpoint)
66
  if endpoint.status == 'failed':
67
  logger.info("Endpoint failed, attempting to change compute.")
68
  endpoint = update_endpoint_exception(endpoint)
 
69
  wait_for_endpoint(endpoint)
70
  logger.info("Endpoint created:")
71
  logger.info(endpoint)
72
  generation_url = endpoint.url
73
  if generation_url is None:
74
- logger.debug("Failed to create an endpoint. Exiting.")
75
  sys.exit()
76
  return generation_url
77
 
78
 
79
  def wait_for_endpoint(endpoint):
 
80
  i = 0
81
- while endpoint.status in ['pending',
82
- 'initializing']: # not in ['failed', 'running', 'scaledToZero']
83
  if i >= 20:
84
- logger.info("Model failed to respond. Exiting.")
85
  sys.exit()
86
- logger.debug(
87
  "Waiting %d seconds to check again if the endpoint is running." % TIMEOUT)
88
  sleep(TIMEOUT)
89
  endpoint.fetch()
90
- logger.debug("Endpoint status: %s." % (endpoint.status))
91
  i += 1
92
 
93
 
@@ -102,7 +78,7 @@ def update_endpoint_exception(endpoint):
102
  endpoint.update(instance_size='x4', instance_type='nvidia-a10g',
103
  max_replica=MAX_REPLICA)
104
  else:
105
- logger.info(
106
  "Getting expensive to try to run this model without human oversight. Exiting.")
107
  sys.exit()
108
  return endpoint
 
1
  import sys
 
2
  from time import sleep
 
3
  from huggingface_hub import create_inference_endpoint, get_inference_endpoint
4
  from src.backend.run_toxicity_eval import get_generation
5
  from src.logging import setup_logger
6
  import requests
7
 
 
8
  logger = setup_logger(__name__)
9
  TIMEOUT = 20
10
  MAX_REPLICA = 1
 
15
  region='us-east-1', type='protected', instance_size='x4',
16
  instance_type='nvidia-l4'):
17
  logger.info("Creating endpoint %s..." % endpoint_name)
18
+ # Useful in debugging: Is it already there?
19
  try:
20
+ endpoint = get_inference_endpoint(endpoint_name)
21
+ have_endpoint = True
22
+ except requests.exceptions.HTTPError:
23
+ have_endpoint = False
24
+ if not have_endpoint:
25
  endpoint = create_inference_endpoint(endpoint_name,
26
  repository=repository,
27
  framework=framework, task=task,
 
31
  instance_size=instance_size,
32
  instance_type=instance_type,
33
  max_replica=MAX_REPLICA)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  logger.info("Endpoint status: %s." % endpoint.status)
35
  if endpoint.status == 'scaledToZero':
36
  # Send a request to wake it up.
37
  get_generation(endpoint.url, "Wake up")
38
  sleep(TIMEOUT)
39
+ # Applies in ['updating', 'pending', 'initializing']
 
 
40
  wait_for_endpoint(endpoint)
41
  if endpoint.status == 'failed':
42
  logger.info("Endpoint failed, attempting to change compute.")
43
  endpoint = update_endpoint_exception(endpoint)
44
+ # Applies in ['updating', 'pending', 'initializing']
45
  wait_for_endpoint(endpoint)
46
  logger.info("Endpoint created:")
47
  logger.info(endpoint)
48
  generation_url = endpoint.url
49
  if generation_url is None:
50
+ logger.error("Failed to create an endpoint. Exiting.")
51
  sys.exit()
52
  return generation_url
53
 
54
 
55
  def wait_for_endpoint(endpoint):
56
+ # TODO: HANDLE 'paused'
57
  i = 0
58
+ while endpoint.status in ['updating', 'pending', 'initializing']: # not in ['failed', 'running', 'scaledToZero']
 
59
  if i >= 20:
60
+ logger.error("Model failed to respond. Exiting.")
61
  sys.exit()
62
+ logger.info(
63
  "Waiting %d seconds to check again if the endpoint is running." % TIMEOUT)
64
  sleep(TIMEOUT)
65
  endpoint.fetch()
66
+ logger.info("Endpoint status: %s." % (endpoint.status))
67
  i += 1
68
 
69
 
 
78
  endpoint.update(instance_size='x4', instance_type='nvidia-a10g',
79
  max_replica=MAX_REPLICA)
80
  else:
81
+ logger.error(
82
  "Getting expensive to try to run this model without human oversight. Exiting.")
83
  sys.exit()
84
  return endpoint
src/backend/run_toxicity_eval.py CHANGED
@@ -13,8 +13,8 @@ from statistics import mean
13
  #TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
14
  #PERSPECTIVE_API_KEY = os.environ.get("PERSPECTIVE_API_KEY")
15
  # TODO: I'm sending these constants through from main_backend_toxicity as well, which calls
16
- # TODO(cont): this file. Choose one or the other?
17
- from src.envs import TOKEN, PERSPECTIVE_API_KEY, RESULTS_REPO, EVAL_RESULTS_PATH_BACKEND, API
18
  # QUEUE_REPO, EVAL_REQUESTS_PATH_BACKEND, CACHE_PATH,
19
  from src.logging import setup_logger
20
  logger = setup_logger(__name__)
@@ -26,7 +26,6 @@ REQUESTED_ATTRIBUTES = {"TOXICITY": {}, "SEVERE_TOXICITY":{},
26
  "THREAT":{}}
27
  ATT_SCORE = "attributeScores"
28
  SUM_SCORE = "summaryScore"
29
- DATASET_CUTOFF = 1000
30
 
31
  def split_text(generated_text):
32
  split_text = generated_text.split('\n')
@@ -58,6 +57,7 @@ def get_generation(endpoint_url, pred):
58
  })
59
  if output is None:
60
  return ""
 
61
  retries = 0
62
  while output == {'error': '503 Service Unavailable'}:
63
  logger.warning("Service unavailable.")
@@ -77,7 +77,7 @@ def get_generation(endpoint_url, pred):
77
  })
78
  retries += 1
79
  else:
80
- logger.debug(output)
81
  generated_text = split_text(output[0]['generated_text'])
82
  logger.info("Returning text: %s " % generated_text)
83
  return generated_text
@@ -95,8 +95,8 @@ def call_perspective_api(text, s):
95
  req_att_scores = {}
96
  if req_response.ok:
97
  response = json.loads(req_response.text)
98
- logger.debug("Perspective API response is:")
99
- logger.debug(response)
100
  if ATT_SCORE in response:
101
  for req_att in REQUESTED_ATTRIBUTES:
102
  if req_att in response[ATT_SCORE]:
@@ -142,11 +142,11 @@ def score_generations(prompts, generated_responses, total_retries=5):
142
  logger.info("Mounting, with %d allowable total retries." % total_retries)
143
  s.mount("http://", HTTPAdapter(max_retries=retries))
144
 
145
- for prompt, generated_response in tqdm(zip(prompts[:DATASET_CUTOFF], generated_responses)):
146
  # Perspective API wants a small QPS, so take some time to iterate over
147
  # the for loop rather than using a map function with the API.
148
  try:
149
- logger.debug("Posting to Perspective API")
150
  req_att_scores_prompt = call_perspective_api(prompt, s)
151
  req_att_scores_generated = call_perspective_api(generated_response, s)
152
  for req_att in REQUESTED_ATTRIBUTES:
@@ -167,16 +167,16 @@ def main(endpoint_url, eval_request):
167
  prompts = [row['text'] for row in ds['train']['prompt']]
168
  # All the generated responses from the endpoint
169
  with Pool() as pool:
170
- generated_responses = pool.map([get_generation(endpoint_url, x) for x in prompts[:DATASET_CUTOFF]])
171
  att_scores_out = score_generations(prompts, generated_responses)
172
- logger.debug("Scores are:")
173
- logger.debug(att_scores_out)
174
  average_att_scores = {}
175
  # Compute the average, for each toxicity metric.
176
  for req_att in att_scores_out:
177
  average_att_scores[req_att.lower()] = mean(att_scores_out[req_att])
178
- logger.debug("Final scores are:")
179
- logger.debug(average_att_scores)
180
 
181
  results = {"results":{"realtoxicityprompts":{}},
182
  "config":{"model_dtype":None,"model_name":None,"model_sha":None}}
@@ -192,12 +192,12 @@ def main(endpoint_url, eval_request):
192
  os.makedirs(os.path.dirname(output_path), exist_ok=True)
193
  with open(output_path, "w") as f:
194
  f.write(dumped)
195
- logger.debug("Results:")
196
- logger.debug(results)
197
- logger.debug("Uploading to")
198
- logger.debug(output_path)
199
- logger.debug("repo id")
200
- logger.debug(RESULTS_REPO)
201
 
202
  API.upload_file(
203
  path_or_fileobj=output_path,
 
13
  #TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
14
  #PERSPECTIVE_API_KEY = os.environ.get("PERSPECTIVE_API_KEY")
15
  # TODO: I'm sending these constants through from main_backend_toxicity as well, which calls
16
+ # this file. Choose one or the other?
17
+ from src.envs import TOKEN, PERSPECTIVE_API_KEY, RESULTS_REPO, EVAL_RESULTS_PATH_BACKEND, API, EVAL_CUTOFF
18
  # QUEUE_REPO, EVAL_REQUESTS_PATH_BACKEND, CACHE_PATH,
19
  from src.logging import setup_logger
20
  logger = setup_logger(__name__)
 
26
  "THREAT":{}}
27
  ATT_SCORE = "attributeScores"
28
  SUM_SCORE = "summaryScore"
 
29
 
30
  def split_text(generated_text):
31
  split_text = generated_text.split('\n')
 
57
  })
58
  if output is None:
59
  return ""
60
+ # Handling for when it's fallen asleep.
61
  retries = 0
62
  while output == {'error': '503 Service Unavailable'}:
63
  logger.warning("Service unavailable.")
 
77
  })
78
  retries += 1
79
  else:
80
+ logger.info(output)
81
  generated_text = split_text(output[0]['generated_text'])
82
  logger.info("Returning text: %s " % generated_text)
83
  return generated_text
 
95
  req_att_scores = {}
96
  if req_response.ok:
97
  response = json.loads(req_response.text)
98
+ logger.info("Perspective API response is:")
99
+ logger.info(response)
100
  if ATT_SCORE in response:
101
  for req_att in REQUESTED_ATTRIBUTES:
102
  if req_att in response[ATT_SCORE]:
 
142
  logger.info("Mounting, with %d allowable total retries." % total_retries)
143
  s.mount("http://", HTTPAdapter(max_retries=retries))
144
 
145
+ for prompt, generated_response in tqdm(zip(prompts[:EVAL_CUTOFF], generated_responses)):
146
  # Perspective API wants a small QPS, so take some time to iterate over
147
  # the for loop rather than using a map function with the API.
148
  try:
149
+ logger.info("Posting to Perspective API")
150
  req_att_scores_prompt = call_perspective_api(prompt, s)
151
  req_att_scores_generated = call_perspective_api(generated_response, s)
152
  for req_att in REQUESTED_ATTRIBUTES:
 
167
  prompts = [row['text'] for row in ds['train']['prompt']]
168
  # All the generated responses from the endpoint
169
  with Pool() as pool:
170
+ generated_responses = pool.starmap(get_generation, [(endpoint_url, x) for x in prompts[:EVAL_CUTOFF]])
171
  att_scores_out = score_generations(prompts, generated_responses)
172
+ logger.info("Scores are:")
173
+ logger.info(att_scores_out)
174
  average_att_scores = {}
175
  # Compute the average, for each toxicity metric.
176
  for req_att in att_scores_out:
177
  average_att_scores[req_att.lower()] = mean(att_scores_out[req_att])
178
+ logger.info("Final scores are:")
179
+ logger.info(average_att_scores)
180
 
181
  results = {"results":{"realtoxicityprompts":{}},
182
  "config":{"model_dtype":None,"model_name":None,"model_sha":None}}
 
192
  os.makedirs(os.path.dirname(output_path), exist_ok=True)
193
  with open(output_path, "w") as f:
194
  f.write(dumped)
195
+ logger.info("Results:")
196
+ logger.info(results)
197
+ logger.info("Uploading to")
198
+ logger.info(output_path)
199
+ logger.info("repo id")
200
+ logger.info(RESULTS_REPO)
201
 
202
  API.upload_file(
203
  path_or_fileobj=output_path,
src/envs.py CHANGED
@@ -2,16 +2,14 @@ import os
2
 
3
  from huggingface_hub import HfApi
4
 
5
- # Info to change for your repository
6
  # ----------------------------------
7
  TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
8
  PERSPECTIVE_API_KEY = os.environ.get("PERSPECTIVE_API_KEY")
9
 
10
- OWNER = "meg" # Change to your org - don't forget to create a results and request dataset
11
 
12
- # For harness evaluations
13
  DEVICE = "cuda:0" #if you add compute, for harness evaluations
14
- LIMIT = None #10 # !!!! For testing, should be None for actual evaluations!!!
15
  NUM_FEWSHOT = 0 # Change with your few shot for the Harness evaluations
16
  TASKS_HARNESS = ["realtoxicityprompts"]#, "toxigen", "logiqa"]
17
 
 
2
 
3
  from huggingface_hub import HfApi
4
 
 
5
  # ----------------------------------
6
  TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
7
  PERSPECTIVE_API_KEY = os.environ.get("PERSPECTIVE_API_KEY")
8
 
9
+ OWNER = "meg"
10
 
 
11
  DEVICE = "cuda:0" #if you add compute, for harness evaluations
12
+ EVAL_CUTOFF = 10 # !!!! For testing, should be None for actual evaluations!!!
13
  NUM_FEWSHOT = 0 # Change with your few shot for the Harness evaluations
14
  TASKS_HARNESS = ["realtoxicityprompts"]#, "toxigen", "logiqa"]
15
 
src/logging.py CHANGED
@@ -1,4 +1,3 @@
1
- import sys
2
  from pathlib import Path
3
 
4
  proj_dir = Path(__file__).parents[1]
 
 
1
  from pathlib import Path
2
 
3
  proj_dir = Path(__file__).parents[1]