|
export const CREATE_VIEWS_QUERY = ` |
|
CREATE VIEW models AS SELECT * FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/refs%2Fconvert%2Fparquet/models/train/0000.parquet?download=true'); |
|
CREATE VIEW datasets AS SELECT * FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/refs%2Fconvert%2Fparquet/datasets/train/0000.parquet?download=true'); |
|
CREATE VIEW spaces AS SELECT * FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/refs%2Fconvert%2Fparquet/spaces/train/0000.parquet?download=true'); |
|
` |
|
|
|
export const FETCH_CHART_DATA_QUERY = ` |
|
WITH all_data AS ( |
|
SELECT DATE_TRUNC('month', CAST(createdAt AS DATE)) AS month, 'model' AS type FROM models |
|
UNION ALL |
|
SELECT DATE_TRUNC('month', CAST(createdAt AS DATE)) AS month, 'dataset' AS type FROM datasets |
|
UNION ALL |
|
SELECT DATE_TRUNC('month', CAST(createdAt AS DATE)) AS month, 'space' AS type FROM spaces |
|
) |
|
SELECT |
|
month, |
|
COUNT(*) FILTER (WHERE type = 'model') AS models, |
|
COUNT(*) FILTER (WHERE type = 'dataset') AS datasets, |
|
COUNT(*) FILTER (WHERE type = 'space') AS spaces |
|
FROM all_data |
|
WHERE month < DATE_TRUNC('month', CURRENT_DATE) |
|
GROUP BY month |
|
ORDER BY month |
|
` |
|
|
|
export const FETCH_MODEL_LICENSE_DATA_QUERY = ` |
|
SELECT tag, COUNT(*) as count |
|
FROM models, UNNEST(tags) AS t(tag) |
|
WHERE tag LIKE 'license:%' |
|
GROUP BY tag; |
|
` |
|
|
|
export const FETCH_DATASET_LICENSE_DATA_QUERY = ` |
|
SELECT tag, COUNT(*) as count |
|
FROM datasets, UNNEST(tags) AS t(tag) |
|
WHERE tag LIKE 'license:%' |
|
GROUP BY tag; |
|
` |
|
|
|
export const FETCH_SPACE_SDK_DATA_QUERY = ` |
|
SELECT sdk, COUNT(*) as count |
|
FROM spaces |
|
GROUP BY sdk; |
|
` |
|
|
|
export const FETCH_FINETUNE_MODEL_GROWTH_QUERY = (baseModel: string) => ` |
|
WITH RECURSIVE month_series AS ( |
|
SELECT DATE_TRUNC('month', MIN(CAST(createdAt AS TIMESTAMP))) - INTERVAL 1 MONTH AS month |
|
FROM models |
|
WHERE EXISTS ( |
|
SELECT 1 FROM UNNEST(tags) AS t(tag) |
|
WHERE tag ILIKE 'base_model:%${baseModel}' |
|
) |
|
|
|
UNION ALL |
|
|
|
SELECT month + INTERVAL 1 MONTH |
|
FROM month_series |
|
WHERE month < DATE_TRUNC('month', CURRENT_DATE) |
|
), |
|
finetuned_models AS ( |
|
SELECT DISTINCT id, DATE_TRUNC('month', CAST(createdAt AS TIMESTAMP)) AS creation_month |
|
FROM models |
|
WHERE EXISTS ( |
|
SELECT 1 FROM UNNEST(tags) AS t(tag) |
|
WHERE tag ILIKE 'base_model:%${baseModel}' |
|
) |
|
) |
|
SELECT |
|
strftime(ms.month, '%Y-%m') as date, |
|
COALESCE(SUM(COUNT(DISTINCT fm.id)) OVER (ORDER BY ms.month), 0) AS count |
|
FROM month_series ms |
|
LEFT JOIN finetuned_models fm ON ms.month = fm.creation_month |
|
GROUP BY ms.month |
|
ORDER BY ms.month |
|
` |
|
|
|
export const FETCH_TOP_BASE_MODELS_TABLE_QUERY = ` |
|
WITH base_models AS ( |
|
SELECT DISTINCT id, |
|
REGEXP_REPLACE(SUBSTRING(tag, 12), '^(finetune:|adapter:)', '') AS model |
|
FROM models, UNNEST(tags) AS t(tag) |
|
WHERE tag ILIKE 'base_model:%' |
|
) |
|
SELECT |
|
model, |
|
COUNT(DISTINCT id) AS finetunes |
|
FROM base_models |
|
GROUP BY model |
|
ORDER BY finetunes DESC |
|
LIMIT 10 |
|
` |