File size: 3,063 Bytes
6aaff3e 73caaf7 6aaff3e 32e1a9b 132e085 02eb851 32e1a9b 02eb851 32e1a9b 02eb851 32e1a9b 02eb851 73caaf7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
export const CREATE_VIEWS_QUERY = `
CREATE VIEW models AS SELECT * FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/refs%2Fconvert%2Fparquet/models/train/0000.parquet?download=true');
CREATE VIEW datasets AS SELECT * FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/refs%2Fconvert%2Fparquet/datasets/train/0000.parquet?download=true');
CREATE VIEW spaces AS SELECT * FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/refs%2Fconvert%2Fparquet/spaces/train/0000.parquet?download=true');
`
export const FETCH_CHART_DATA_QUERY = `
WITH all_data AS (
SELECT DATE_TRUNC('month', CAST(createdAt AS DATE)) AS month, 'model' AS type FROM models
UNION ALL
SELECT DATE_TRUNC('month', CAST(createdAt AS DATE)) AS month, 'dataset' AS type FROM datasets
UNION ALL
SELECT DATE_TRUNC('month', CAST(createdAt AS DATE)) AS month, 'space' AS type FROM spaces
)
SELECT
month,
COUNT(*) FILTER (WHERE type = 'model') AS models,
COUNT(*) FILTER (WHERE type = 'dataset') AS datasets,
COUNT(*) FILTER (WHERE type = 'space') AS spaces
FROM all_data
WHERE month < DATE_TRUNC('month', CURRENT_DATE)
GROUP BY month
ORDER BY month
`
export const FETCH_MODEL_LICENSE_DATA_QUERY = `
SELECT tag, COUNT(*) as count
FROM models, UNNEST(tags) AS t(tag)
WHERE tag LIKE 'license:%'
GROUP BY tag;
`
export const FETCH_DATASET_LICENSE_DATA_QUERY = `
SELECT tag, COUNT(*) as count
FROM datasets, UNNEST(tags) AS t(tag)
WHERE tag LIKE 'license:%'
GROUP BY tag;
`
export const FETCH_SPACE_SDK_DATA_QUERY = `
SELECT sdk, COUNT(*) as count
FROM spaces
GROUP BY sdk;
`
export const FETCH_FINETUNE_MODEL_GROWTH_QUERY = (baseModel: string) => `
WITH RECURSIVE month_series AS (
SELECT DATE_TRUNC('month', MIN(CAST(createdAt AS TIMESTAMP))) - INTERVAL 1 MONTH AS month
FROM models
WHERE EXISTS (
SELECT 1 FROM UNNEST(tags) AS t(tag)
WHERE tag ILIKE 'base_model:%${baseModel}'
)
UNION ALL
SELECT month + INTERVAL 1 MONTH
FROM month_series
WHERE month < DATE_TRUNC('month', CURRENT_DATE)
),
finetuned_models AS (
SELECT DISTINCT id, DATE_TRUNC('month', CAST(createdAt AS TIMESTAMP)) AS creation_month
FROM models
WHERE EXISTS (
SELECT 1 FROM UNNEST(tags) AS t(tag)
WHERE tag ILIKE 'base_model:%${baseModel}'
)
)
SELECT
strftime(ms.month, '%Y-%m') as date,
COALESCE(SUM(COUNT(DISTINCT fm.id)) OVER (ORDER BY ms.month), 0) AS count
FROM month_series ms
LEFT JOIN finetuned_models fm ON ms.month = fm.creation_month
GROUP BY ms.month
ORDER BY ms.month
`
export const FETCH_TOP_BASE_MODELS_TABLE_QUERY = `
WITH base_models AS (
SELECT DISTINCT id,
REGEXP_REPLACE(SUBSTRING(tag, 12), '^(finetune:|adapter:)', '') AS model
FROM models, UNNEST(tags) AS t(tag)
WHERE tag ILIKE 'base_model:%'
)
SELECT
model,
COUNT(DISTINCT id) AS finetunes
FROM base_models
GROUP BY model
ORDER BY finetunes DESC
LIMIT 10
` |