File size: 3,063 Bytes
6aaff3e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73caaf7
6aaff3e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32e1a9b
 
 
132e085
02eb851
 
 
 
 
32e1a9b
 
 
 
 
 
 
 
02eb851
 
 
 
 
 
32e1a9b
 
 
02eb851
32e1a9b
 
 
 
 
02eb851
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73caaf7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
export const CREATE_VIEWS_QUERY = `
  CREATE VIEW models AS SELECT * FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/refs%2Fconvert%2Fparquet/models/train/0000.parquet?download=true');
  CREATE VIEW datasets AS SELECT * FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/refs%2Fconvert%2Fparquet/datasets/train/0000.parquet?download=true');
  CREATE VIEW spaces AS SELECT * FROM read_parquet('https://huggingface.co/datasets/cfahlgren1/hub-stats/resolve/refs%2Fconvert%2Fparquet/spaces/train/0000.parquet?download=true');
`

export const FETCH_CHART_DATA_QUERY = `
  WITH all_data AS (
    SELECT DATE_TRUNC('month', CAST(createdAt AS DATE)) AS month, 'model' AS type FROM models
    UNION ALL
    SELECT DATE_TRUNC('month', CAST(createdAt AS DATE)) AS month, 'dataset' AS type FROM datasets
    UNION ALL
    SELECT DATE_TRUNC('month', CAST(createdAt AS DATE)) AS month, 'space' AS type FROM spaces
  )
  SELECT
    month,
    COUNT(*) FILTER (WHERE type = 'model') AS models,
    COUNT(*) FILTER (WHERE type = 'dataset') AS datasets,
    COUNT(*) FILTER (WHERE type = 'space') AS spaces
  FROM all_data
  WHERE month < DATE_TRUNC('month', CURRENT_DATE)
  GROUP BY month
  ORDER BY month
`

export const FETCH_MODEL_LICENSE_DATA_QUERY = `
  SELECT tag, COUNT(*) as count
  FROM models, UNNEST(tags) AS t(tag)
  WHERE tag LIKE 'license:%'
  GROUP BY tag;
`

export const FETCH_DATASET_LICENSE_DATA_QUERY = `
  SELECT tag, COUNT(*) as count
  FROM datasets, UNNEST(tags) AS t(tag)
  WHERE tag LIKE 'license:%'
  GROUP BY tag;
`

export const FETCH_SPACE_SDK_DATA_QUERY = `
  SELECT sdk, COUNT(*) as count
  FROM spaces
  GROUP BY sdk;
`

export const FETCH_FINETUNE_MODEL_GROWTH_QUERY = (baseModel: string) => `
  WITH RECURSIVE month_series AS (
    SELECT DATE_TRUNC('month', MIN(CAST(createdAt AS TIMESTAMP))) - INTERVAL 1 MONTH AS month
    FROM models
    WHERE EXISTS (
      SELECT 1 FROM UNNEST(tags) AS t(tag)
      WHERE tag ILIKE 'base_model:%${baseModel}'
    )

    UNION ALL

    SELECT month + INTERVAL 1 MONTH
    FROM month_series
    WHERE month < DATE_TRUNC('month', CURRENT_DATE)
  ),
  finetuned_models AS (
    SELECT DISTINCT id, DATE_TRUNC('month', CAST(createdAt AS TIMESTAMP)) AS creation_month
    FROM models
    WHERE EXISTS (
      SELECT 1 FROM UNNEST(tags) AS t(tag)
      WHERE tag ILIKE 'base_model:%${baseModel}'
    )
  )
  SELECT
    strftime(ms.month, '%Y-%m') as date,
    COALESCE(SUM(COUNT(DISTINCT fm.id)) OVER (ORDER BY ms.month), 0) AS count
  FROM month_series ms
  LEFT JOIN finetuned_models fm ON ms.month = fm.creation_month
  GROUP BY ms.month
  ORDER BY ms.month
`

export const FETCH_TOP_BASE_MODELS_TABLE_QUERY = `
  WITH base_models AS (
    SELECT DISTINCT id,
      REGEXP_REPLACE(SUBSTRING(tag, 12), '^(finetune:|adapter:)', '') AS model
    FROM models, UNNEST(tags) AS t(tag)
    WHERE tag ILIKE 'base_model:%'
  )
  SELECT
    model,
    COUNT(DISTINCT id) AS finetunes
  FROM base_models
  GROUP BY model
  ORDER BY finetunes DESC
  LIMIT 10
`