import os from pathlib import Path import gradio as gr from huggingface_hub import WebhookPayload, WebhooksServer from src.my_logger import setup_logger from src.utilities import load_datasets, merge_and_update_datasets from src.visualize_logs import log_file_to_html_string from src.build_nomic import build_nomic from src.readme_update import update_dataset_readme proj_dir = Path(__name__).parent logger = setup_logger(__name__) logger.info("Starting Application...") SUBREDDIT = os.environ["SUBREDDIT"] USERNAME = os.environ["USERNAME"] OG_DATASET = f"{USERNAME}/dataset-creator-reddit-{SUBREDDIT}" PROCESSED_DATASET = os.environ['PROCESSED_DATASET'] HUGGINGFACE_AUTH_TOKEN = os.environ["HUGGINGFACE_AUTH_TOKEN"] WEBHOOK_SECRET = os.getenv("HF_WEBHOOK_SECRET", 'secret') intro_md = """ # Processing BORU ## Creation Details This space is triggered by a webhook for changes on [derek-thomas/dataset-creator-reddit-bestofredditorupdates](https://huggingface.co/datasets/derek-thomas/dataset-creator-reddit-bestofredditorupdates). It then takes the updates from that dataset and get embeddings and puts the results in [https://huggingface.co/datasets/derek-thomas/reddit-bestofredditorupdates-processed](https://huggingface.co/datasets/derek-thomas/reddit-bestofredditorupdates-processed) Check out the original on [Nomic](https://atlas.nomic.ai/data/derek2/boru-subreddit-neural-search/map) ## What is this for beginners? This is a space to visually search the subreddit /r/bestofredditorupdates. Have you ever been curious to search for stories that are similar to one of your favorites? This can help! - Each dot represents a post (try clicking on one) - Closer dots are similar in topic - Use the filters on the left to help you narrow down what you are looking for - The lasso can help you search in a smaller range that you drag with your mouse - The filter can help you narrow by field, - Filtering posts that are `CONCLUDED` - Filtering popular posts - Filtering by date - The search can help you look by keyword ## Todo - Ignore the colors for now, I need to clean that up :) - I need to integrate with Nomic's semantic search """ url = "https://atlas.nomic.ai/data/derek2/boru-subreddit-neural-search/map/cdd8c890-2fac-4ea6-91f8-e6821203cfcb" html_str = f'