import os from pathlib import Path import gradio as gr from huggingface_hub import WebhookPayload, WebhooksServer from src.my_logger import setup_logger from src.utilities import load_datasets, merge_and_update_datasets from src.visualize_logs import log_file_to_html_string from src.build_nomic import build_nomic proj_dir = Path(__name__).parent logger = setup_logger(__name__) logger.info("Starting Application...") SUBREDDIT = os.environ["SUBREDDIT"] USERNAME = os.environ["USERNAME"] OG_DATASET = f"{USERNAME}/dataset-creator-reddit-{SUBREDDIT}" PROCESSED_DATASET = os.environ['PROCESSED_DATASET'] HUGGINGFACE_AUTH_TOKEN = os.environ["HUGGINGFACE_AUTH_TOKEN"] WEBHOOK_SECRET = os.getenv("HF_WEBHOOK_SECRET", 'secret') intro_md = """ # Processing BORU This space is triggered by a webhook for changes on [derek-thomas/dataset-creator-reddit-bestofredditorupdates](https://huggingface.co/datasets/derek-thomas/dataset-creator-reddit-bestofredditorupdates). It then takes the updates from that dataset and get embeddings and puts the results in [https://huggingface.co/datasets/derek-thomas/reddit-bestofredditorupdates-processed](https://huggingface.co/datasets/derek-thomas/reddit-bestofredditorupdates-processed) Check out the original on [Nomic](https://atlas.nomic.ai/data/derek2/boru-subreddit-neural-search/map) """ html_str = """