import os from pathlib import Path import gradio as gr from huggingface_hub import WebhookPayload, WebhooksServer from src.utilities import load_datasets, merge_and_update_datasets from src.my_logger import setup_logger from src.visualize_logs import log_file_to_html_string proj_dir = Path(__name__).parent logger = setup_logger(__name__) SUBREDDIT = os.environ["SUBREDDIT"] USERNAME = os.environ["USERNAME"] OG_DATASET= f"{USERNAME}/dataset-creator-reddit-{SUBREDDIT}" PROCESSED_DATASET = os.environ['PROCESSED_DATASET'] HUGGINGFACE_AUTH_TOKEN = os.environ["HUGGINGFACE_AUTH_TOKEN"] WEBHOOK_SECRET = os.getenv("HF_WEBHOOK_SECRET", 'secret') intro_md = """ # Processing BORU This space is triggered by a webhook for changes on [derek-thomas/dataset-creator-reddit-bestofredditorupdates](https://huggingface.co/datasets/derek-thomas/dataset-creator-reddit-bestofredditorupdates). It then takes the updates from that dataset and get embeddings and puts the results in [https://huggingface.co/datasets/derek-thomas/reddit-bestofredditorupdates-processed](https://huggingface.co/datasets/derek-thomas/reddit-bestofredditorupdates-processed) """ with gr.Blocks() as ui: with gr.Tab("Application"): gr.Markdown(intro_md) output = gr.HTML(log_file_to_html_string, every=1) app = WebhooksServer(ui=ui.queue(), webhook_secret=WEBHOOK_SECRET) @app.add_webhook("/dataset_repo") async def community(payload: WebhookPayload): if payload.event.scope.startswith("repo"): logger.info(f"Webhook received from {payload.repo.name} indicating a repo {payload.event.action}") else: return logger.info(f"Loading new dataset...") dataset, original_dataset = load_datasets() logger.info(f"Loaded new dataset") logger.info(f"Merging and Updating row...") dataset = merge_and_update_datasets(dataset, original_dataset) # Push the augmented dataset to the Hugging Face hub logger.debug(f"Pushing processed data to the Hugging Face Hub...") dataset.push_to_hub(PROCESSED_DATASET, token=HUGGINGFACE_AUTH_TOKEN) logger.info(f"Pushed processed data to the Hugging Face Hub") if __name__ == '__main__': app.launch(server_name="0.0.0.0", show_error=True, server_port=7860) # ui.queue().launch(server_name="0.0.0.0", show_error=True, server_port=7860)