derek-thomas HF staff commited on
Commit
d9a1859
1 Parent(s): 65f2fab

Updating Embeddings space

Browse files
Files changed (1) hide show
  1. src/build_nomic.py +16 -0
src/build_nomic.py CHANGED
@@ -4,10 +4,14 @@ import pandas as pd
4
 
5
  import nomic
6
  from nomic import atlas
 
7
  import numpy as np
8
 
 
 
9
  NOMIC_KEY = os.getenv('NOMIC_KEY')
10
  nomic.login(NOMIC_KEY)
 
11
 
12
 
13
  def count_words(text):
@@ -37,9 +41,21 @@ def build_nomic(dataset):
37
 
38
  df['word_count'] = df['content'].apply(count_words)
39
 
 
 
 
 
 
 
 
 
 
 
40
  # Create Atlas project
 
41
  project = atlas.map_data(embeddings=np.stack(df['embedding'].values),
42
  data=df[non_embedding_columns].to_dict(orient='records'),
43
  id_field='id',
44
  identifier='BORU Subreddit Neural Search',
45
  )
 
 
4
 
5
  import nomic
6
  from nomic import atlas
7
+ from nomic.dataset import AtlasClass
8
  import numpy as np
9
 
10
+ from src.my_logger import setup_logger
11
+
12
  NOMIC_KEY = os.getenv('NOMIC_KEY')
13
  nomic.login(NOMIC_KEY)
14
+ logger = setup_logger(__name__)
15
 
16
 
17
  def count_words(text):
 
41
 
42
  df['word_count'] = df['content'].apply(count_words)
43
 
44
+
45
+ logger.info(f"Trying to delete old version of nomic Atlas...")
46
+ try:
47
+ ac = AtlasClass()
48
+ atlas_id = ac._get_dataset_by_slug_identifier("derek2/boru-subreddit-neural-search")['id']
49
+ ac._delete_project_by_id(atlas_id)
50
+ logger.info(f"Succeeded in deleting old version of nomic Atlas.")
51
+ except:
52
+ logger.info(f"Failed to delete old version of nomic Atlas.")
53
+
54
  # Create Atlas project
55
+ logger.info(f"Trying to create new version of Atlas...")
56
  project = atlas.map_data(embeddings=np.stack(df['embedding'].values),
57
  data=df[non_embedding_columns].to_dict(orient='records'),
58
  id_field='id',
59
  identifier='BORU Subreddit Neural Search',
60
  )
61
+ logger.info(f"Succeeded in creating new version of nomic Atlas.")