File size: 2,324 Bytes
9ae1b66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import os
from pathlib import Path

import gradio as gr
from huggingface_hub import WebhookPayload, WebhooksServer

from src.utilities import load_datasets, merge_and_update_datasets
from src.my_logger import setup_logger
from src.visualize_logs import log_file_to_html_string

proj_dir = Path(__name__).parent

logger = setup_logger(__name__)

SUBREDDIT = os.environ["SUBREDDIT"]
USERNAME = os.environ["USERNAME"]
OG_DATASET= f"{USERNAME}/dataset-creator-reddit-{SUBREDDIT}"
PROCESSED_DATASET = os.environ['PROCESSED_DATASET']
HUGGINGFACE_AUTH_TOKEN = os.environ["HUGGINGFACE_AUTH_TOKEN"]
WEBHOOK_SECRET = os.getenv("HF_WEBHOOK_SECRET", 'secret')

intro_md = """
# Processing BORU
This space is triggered by a webhook for changes on 
[derek-thomas/dataset-creator-reddit-bestofredditorupdates](https://huggingface.co/datasets/derek-thomas/dataset-creator-reddit-bestofredditorupdates).
 It then takes the updates from that dataset and get embeddings and puts the results in 
[https://huggingface.co/datasets/derek-thomas/reddit-bestofredditorupdates-processed](https://huggingface.co/datasets/derek-thomas/reddit-bestofredditorupdates-processed)
"""

with gr.Blocks() as ui:
    with gr.Tab("Application"):
        gr.Markdown(intro_md)
        output = gr.HTML(log_file_to_html_string, every=1)

app = WebhooksServer(ui=ui.queue(), webhook_secret=WEBHOOK_SECRET)


@app.add_webhook("/dataset_repo")
async def community(payload: WebhookPayload):
    if payload.event.scope.startswith("repo"):
        logger.info(f"Webhook received from {payload.repo.name} indicating a repo {payload.event.action}")
    else:
        return

    logger.info(f"Loading new dataset...")
    dataset, original_dataset = load_datasets()
    logger.info(f"Loaded new dataset")

    logger.info(f"Merging and Updating row...")
    dataset = merge_and_update_datasets(dataset, original_dataset)

    # Push the augmented dataset to the Hugging Face hub
    logger.debug(f"Pushing processed data to the Hugging Face Hub...")
    dataset.push_to_hub(PROCESSED_DATASET, token=HUGGINGFACE_AUTH_TOKEN)
    logger.info(f"Pushed processed data to the Hugging Face Hub")

if __name__ == '__main__':
    app.launch(server_name="0.0.0.0", show_error=True, server_port=7860)
    # ui.queue().launch(server_name="0.0.0.0", show_error=True, server_port=7860)