Spaces:
Runtime error
Runtime error
import os | |
from pathlib import Path | |
import gradio as gr | |
from huggingface_hub import WebhookPayload, WebhooksServer | |
from src.my_logger import setup_logger | |
from src.utilities import load_datasets, merge_and_update_datasets | |
from src.visualize_logs import log_file_to_html_string | |
from src.build_nomic import build_nomic | |
proj_dir = Path(__name__).parent | |
logger = setup_logger(__name__) | |
logger.info("Starting Application...") | |
SUBREDDIT = os.environ["SUBREDDIT"] | |
USERNAME = os.environ["USERNAME"] | |
OG_DATASET = f"{USERNAME}/dataset-creator-reddit-{SUBREDDIT}" | |
PROCESSED_DATASET = os.environ['PROCESSED_DATASET'] | |
HUGGINGFACE_AUTH_TOKEN = os.environ["HUGGINGFACE_AUTH_TOKEN"] | |
WEBHOOK_SECRET = os.getenv("HF_WEBHOOK_SECRET", 'secret') | |
intro_md = """ | |
# Processing BORU | |
This space is triggered by a webhook for changes on | |
[derek-thomas/dataset-creator-reddit-bestofredditorupdates](https://huggingface.co/datasets/derek-thomas/dataset-creator-reddit-bestofredditorupdates). | |
It then takes the updates from that dataset and get embeddings and puts the results in | |
[https://huggingface.co/datasets/derek-thomas/reddit-bestofredditorupdates-processed](https://huggingface.co/datasets/derek-thomas/reddit-bestofredditorupdates-processed) | |
Check out the original on [Nomic](https://atlas.nomic.ai/data/derek2/boru-subreddit-neural-search/map) | |
""" | |
html_str = """ | |
<html> | |
<head> | |
<title>conll2003</title> | |
<style> | |
body { | |
font-family: Arial, sans-serif; | |
background-color: #f0f0f0; | |
display: flex; | |
justify-content: center; | |
align-items: center; | |
height: 100vh; | |
margin: 0; | |
padding: 0; | |
color: #333; | |
} | |
.iframe-container { | |
border: 1px solid #ccc; | |
border-radius: 10px; | |
overflow: hidden; | |
width: 80%; | |
height: 80%; | |
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1); | |
} | |
iframe { | |
width: 100%; | |
height: 100%; | |
border: none; | |
} | |
</style> | |
</head> | |
<body> | |
<div class="iframe-container"> | |
<iframe src="https://atlas.nomic.ai/data/derek2/boru-subreddit-neural-search/map/cdd8c890-2fac-4ea6-91f8-e6821203cfcb" allow="clipboard-read; clipboard-write" | |
title="Nomic Atlas"></iframe> | |
</div> | |
</body> | |
</html>""" | |
with gr.Blocks() as ui: | |
with gr.Tab("Application"): | |
gr.Markdown(intro_md) | |
gr.HTML(html_str) | |
with gr.Tab("Logs"): | |
gr.Markdown("# Logs") | |
output = gr.HTML(log_file_to_html_string, every=1) | |
app = WebhooksServer(ui=ui.queue(), webhook_secret=WEBHOOK_SECRET) | |
async def community(payload: WebhookPayload): | |
if payload.event.scope.startswith("repo"): | |
logger.info(f"Webhook received from {payload.repo.name} indicating a repo {payload.event.action}") | |
else: | |
return | |
logger.info(f"Loading new dataset...") | |
dataset, original_dataset = load_datasets() | |
logger.info(f"Loaded new dataset") | |
logger.info(f"Merging and Updating row...") | |
dataset = merge_and_update_datasets(dataset, original_dataset) | |
# Push the augmented dataset to the Hugging Face hub | |
logger.info(f"Pushing processed data to the Hugging Face Hub...") | |
dataset.push_to_hub(PROCESSED_DATASET, token=HUGGINGFACE_AUTH_TOKEN) | |
logger.info(f"Pushed processed data to the Hugging Face Hub") | |
logger.info(f"Building Nomic...") | |
build_nomic(dataset=dataset) | |
logger.info(f"Built Nomic") | |
if __name__ == '__main__': | |
app.launch(server_name="0.0.0.0", show_error=True, server_port=7860) | |
# ui.queue().launch(server_name="0.0.0.0", show_error=True, server_port=7860) | |