Commit
•
719c272
1
Parent(s):
ca293bc
Make the code robust against HTTP errors
Browse files- src/details.py +3 -0
- src/hub.py +22 -1
- src/model_tree.py +5 -1
- src/results.py +3 -0
src/details.py
CHANGED
@@ -55,6 +55,8 @@ async def load_details_dataframe(model_id, subtask):
|
|
55 |
return
|
56 |
path = max(paths)
|
57 |
data = await load_jsonlines_file(path)
|
|
|
|
|
58 |
df = pd.json_normalize(data)
|
59 |
# Keep model_name:
|
60 |
df["model_name"] = model_id
|
@@ -70,6 +72,7 @@ async def load_details(subtask, *model_ids_lists):
|
|
70 |
for model_id in model_ids
|
71 |
]
|
72 |
)
|
|
|
73 |
if dfs:
|
74 |
return pd.concat(dfs)
|
75 |
|
|
|
55 |
return
|
56 |
path = max(paths)
|
57 |
data = await load_jsonlines_file(path)
|
58 |
+
if not data:
|
59 |
+
return
|
60 |
df = pd.json_normalize(data)
|
61 |
# Keep model_name:
|
62 |
df["model_name"] = model_id
|
|
|
72 |
for model_id in model_ids
|
73 |
]
|
74 |
)
|
75 |
+
dfs = [df for df in dfs if df is not None]
|
76 |
if dfs:
|
77 |
return pd.concat(dfs)
|
78 |
|
src/hub.py
CHANGED
@@ -8,7 +8,20 @@ from huggingface_hub.utils import build_hf_headers
|
|
8 |
import src.constants as constants
|
9 |
|
10 |
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
fs = HfFileSystem()
|
13 |
|
14 |
|
@@ -20,12 +33,16 @@ def glob(path):
|
|
20 |
async def load_json_file(path):
|
21 |
url = to_url(path)
|
22 |
r = await client.get(url)
|
|
|
|
|
23 |
return r.json()
|
24 |
|
25 |
|
26 |
async def load_jsonlines_file(path):
|
27 |
url = to_url(path)
|
28 |
r = await client.get(url, headers=build_hf_headers())
|
|
|
|
|
29 |
f = io.StringIO(r.text)
|
30 |
return [json.loads(line) for line in f]
|
31 |
|
@@ -39,6 +56,8 @@ def to_url(path):
|
|
39 |
async def load_model_card(model_id):
|
40 |
url = to_url(f"{model_id}/README.md")
|
41 |
r = await client.get(url)
|
|
|
|
|
42 |
return ModelCard(r.text, ignore_metadata_errors=True)
|
43 |
|
44 |
|
@@ -47,4 +66,6 @@ async def list_models(filtering=None):
|
|
47 |
if filtering:
|
48 |
params["filter"] = filtering
|
49 |
r = await client.get(f"{constants.HF_API_URL}/models", params=params)
|
|
|
|
|
50 |
return r.json()
|
|
|
8 |
import src.constants as constants
|
9 |
|
10 |
|
11 |
+
class Client:
|
12 |
+
def __init__(self):
|
13 |
+
self.client = httpx.AsyncClient(follow_redirects=True)
|
14 |
+
|
15 |
+
async def get(self, url, headers=None, params=None):
|
16 |
+
try:
|
17 |
+
r = await self.client.get(url, headers=headers, params=params)
|
18 |
+
r.raise_for_status()
|
19 |
+
except httpx.HTTPError:
|
20 |
+
return
|
21 |
+
return r
|
22 |
+
|
23 |
+
|
24 |
+
client = Client()
|
25 |
fs = HfFileSystem()
|
26 |
|
27 |
|
|
|
33 |
async def load_json_file(path):
|
34 |
url = to_url(path)
|
35 |
r = await client.get(url)
|
36 |
+
if r is None:
|
37 |
+
return
|
38 |
return r.json()
|
39 |
|
40 |
|
41 |
async def load_jsonlines_file(path):
|
42 |
url = to_url(path)
|
43 |
r = await client.get(url, headers=build_hf_headers())
|
44 |
+
if r is None:
|
45 |
+
return
|
46 |
f = io.StringIO(r.text)
|
47 |
return [json.loads(line) for line in f]
|
48 |
|
|
|
56 |
async def load_model_card(model_id):
|
57 |
url = to_url(f"{model_id}/README.md")
|
58 |
r = await client.get(url)
|
59 |
+
if r is None:
|
60 |
+
return
|
61 |
return ModelCard(r.text, ignore_metadata_errors=True)
|
62 |
|
63 |
|
|
|
66 |
if filtering:
|
67 |
params["filter"] = filtering
|
68 |
r = await client.get(f"{constants.HF_API_URL}/models", params=params)
|
69 |
+
if r is None:
|
70 |
+
return
|
71 |
return r.json()
|
src/model_tree.py
CHANGED
@@ -30,13 +30,17 @@ async def load_model_tree(result_paths_per_model, model_ids):
|
|
30 |
|
31 |
async def load_base_models(model_id) -> list[str]:
|
32 |
card = await load_model_card(model_id)
|
|
|
|
|
33 |
base_models = getattr(card.data, constants.BASE_MODEL_TYPE[1])
|
34 |
if not isinstance(base_models, list):
|
35 |
base_models = [base_models]
|
36 |
return base_models
|
37 |
|
38 |
|
39 |
-
async def load_derived_models_by_type(model_id, derived_model_type):
|
40 |
models = await list_models(filtering=f"base_model:{derived_model_type}:{model_id}")
|
|
|
|
|
41 |
models = [model["id"] for model in models]
|
42 |
return models
|
|
|
30 |
|
31 |
async def load_base_models(model_id) -> list[str]:
|
32 |
card = await load_model_card(model_id)
|
33 |
+
if not card:
|
34 |
+
return []
|
35 |
base_models = getattr(card.data, constants.BASE_MODEL_TYPE[1])
|
36 |
if not isinstance(base_models, list):
|
37 |
base_models = [base_models]
|
38 |
return base_models
|
39 |
|
40 |
|
41 |
+
async def load_derived_models_by_type(model_id, derived_model_type) -> list[str]:
|
42 |
models = await list_models(filtering=f"base_model:{derived_model_type}:{model_id}")
|
43 |
+
if not models:
|
44 |
+
return []
|
45 |
models = [model["id"] for model in models]
|
46 |
return models
|
src/results.py
CHANGED
@@ -34,6 +34,9 @@ async def load_results_dataframe(model_id, result_paths_per_model=None):
|
|
34 |
return
|
35 |
result_paths = result_paths_per_model[model_id]
|
36 |
results = await asyncio.gather(*[load_json_file(path) for path in result_paths])
|
|
|
|
|
|
|
37 |
data = {"results": {}, "configs": {}}
|
38 |
for result in results:
|
39 |
data["results"].update(result["results"])
|
|
|
34 |
return
|
35 |
result_paths = result_paths_per_model[model_id]
|
36 |
results = await asyncio.gather(*[load_json_file(path) for path in result_paths])
|
37 |
+
results = [result for result in results if result]
|
38 |
+
if not results:
|
39 |
+
return
|
40 |
data = {"results": {}, "configs": {}}
|
41 |
for result in results:
|
42 |
data["results"].update(result["results"])
|