albertvillanova HF staff commited on
Commit
719c272
1 Parent(s): ca293bc

Make the code robust against HTTP errors

Browse files
Files changed (4) hide show
  1. src/details.py +3 -0
  2. src/hub.py +22 -1
  3. src/model_tree.py +5 -1
  4. src/results.py +3 -0
src/details.py CHANGED
@@ -55,6 +55,8 @@ async def load_details_dataframe(model_id, subtask):
55
  return
56
  path = max(paths)
57
  data = await load_jsonlines_file(path)
 
 
58
  df = pd.json_normalize(data)
59
  # Keep model_name:
60
  df["model_name"] = model_id
@@ -70,6 +72,7 @@ async def load_details(subtask, *model_ids_lists):
70
  for model_id in model_ids
71
  ]
72
  )
 
73
  if dfs:
74
  return pd.concat(dfs)
75
 
 
55
  return
56
  path = max(paths)
57
  data = await load_jsonlines_file(path)
58
+ if not data:
59
+ return
60
  df = pd.json_normalize(data)
61
  # Keep model_name:
62
  df["model_name"] = model_id
 
72
  for model_id in model_ids
73
  ]
74
  )
75
+ dfs = [df for df in dfs if df is not None]
76
  if dfs:
77
  return pd.concat(dfs)
78
 
src/hub.py CHANGED
@@ -8,7 +8,20 @@ from huggingface_hub.utils import build_hf_headers
8
  import src.constants as constants
9
 
10
 
11
- client = httpx.AsyncClient(follow_redirects=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  fs = HfFileSystem()
13
 
14
 
@@ -20,12 +33,16 @@ def glob(path):
20
  async def load_json_file(path):
21
  url = to_url(path)
22
  r = await client.get(url)
 
 
23
  return r.json()
24
 
25
 
26
  async def load_jsonlines_file(path):
27
  url = to_url(path)
28
  r = await client.get(url, headers=build_hf_headers())
 
 
29
  f = io.StringIO(r.text)
30
  return [json.loads(line) for line in f]
31
 
@@ -39,6 +56,8 @@ def to_url(path):
39
  async def load_model_card(model_id):
40
  url = to_url(f"{model_id}/README.md")
41
  r = await client.get(url)
 
 
42
  return ModelCard(r.text, ignore_metadata_errors=True)
43
 
44
 
@@ -47,4 +66,6 @@ async def list_models(filtering=None):
47
  if filtering:
48
  params["filter"] = filtering
49
  r = await client.get(f"{constants.HF_API_URL}/models", params=params)
 
 
50
  return r.json()
 
8
  import src.constants as constants
9
 
10
 
11
+ class Client:
12
+ def __init__(self):
13
+ self.client = httpx.AsyncClient(follow_redirects=True)
14
+
15
+ async def get(self, url, headers=None, params=None):
16
+ try:
17
+ r = await self.client.get(url, headers=headers, params=params)
18
+ r.raise_for_status()
19
+ except httpx.HTTPError:
20
+ return
21
+ return r
22
+
23
+
24
+ client = Client()
25
  fs = HfFileSystem()
26
 
27
 
 
33
  async def load_json_file(path):
34
  url = to_url(path)
35
  r = await client.get(url)
36
+ if r is None:
37
+ return
38
  return r.json()
39
 
40
 
41
  async def load_jsonlines_file(path):
42
  url = to_url(path)
43
  r = await client.get(url, headers=build_hf_headers())
44
+ if r is None:
45
+ return
46
  f = io.StringIO(r.text)
47
  return [json.loads(line) for line in f]
48
 
 
56
  async def load_model_card(model_id):
57
  url = to_url(f"{model_id}/README.md")
58
  r = await client.get(url)
59
+ if r is None:
60
+ return
61
  return ModelCard(r.text, ignore_metadata_errors=True)
62
 
63
 
 
66
  if filtering:
67
  params["filter"] = filtering
68
  r = await client.get(f"{constants.HF_API_URL}/models", params=params)
69
+ if r is None:
70
+ return
71
  return r.json()
src/model_tree.py CHANGED
@@ -30,13 +30,17 @@ async def load_model_tree(result_paths_per_model, model_ids):
30
 
31
  async def load_base_models(model_id) -> list[str]:
32
  card = await load_model_card(model_id)
 
 
33
  base_models = getattr(card.data, constants.BASE_MODEL_TYPE[1])
34
  if not isinstance(base_models, list):
35
  base_models = [base_models]
36
  return base_models
37
 
38
 
39
- async def load_derived_models_by_type(model_id, derived_model_type):
40
  models = await list_models(filtering=f"base_model:{derived_model_type}:{model_id}")
 
 
41
  models = [model["id"] for model in models]
42
  return models
 
30
 
31
  async def load_base_models(model_id) -> list[str]:
32
  card = await load_model_card(model_id)
33
+ if not card:
34
+ return []
35
  base_models = getattr(card.data, constants.BASE_MODEL_TYPE[1])
36
  if not isinstance(base_models, list):
37
  base_models = [base_models]
38
  return base_models
39
 
40
 
41
+ async def load_derived_models_by_type(model_id, derived_model_type) -> list[str]:
42
  models = await list_models(filtering=f"base_model:{derived_model_type}:{model_id}")
43
+ if not models:
44
+ return []
45
  models = [model["id"] for model in models]
46
  return models
src/results.py CHANGED
@@ -34,6 +34,9 @@ async def load_results_dataframe(model_id, result_paths_per_model=None):
34
  return
35
  result_paths = result_paths_per_model[model_id]
36
  results = await asyncio.gather(*[load_json_file(path) for path in result_paths])
 
 
 
37
  data = {"results": {}, "configs": {}}
38
  for result in results:
39
  data["results"].update(result["results"])
 
34
  return
35
  result_paths = result_paths_per_model[model_id]
36
  results = await asyncio.gather(*[load_json_file(path) for path in result_paths])
37
+ results = [result for result in results if result]
38
+ if not results:
39
+ return
40
  data = {"results": {}, "configs": {}}
41
  for result in results:
42
  data["results"].update(result["results"])