Spaces:
Running
Running
from fasthtml.common import * | |
from fasthtml.components import * | |
import json | |
import string | |
import random | |
import jsonlines | |
def gen_random_id() -> str: | |
return "".join(random.choices(string.ascii_lowercase, k=8)) | |
def view_data( | |
before, | |
after, | |
doc_id, | |
data_source: str = None, | |
data_sources=None, | |
target: str = "colcontent", | |
): | |
if data_sources is not None: | |
drop_down = Select( | |
*[ | |
Option(ds, value=ds, selected=(ds == data_source)) | |
for ds in data_sources | |
], | |
name=f"data_source_{target}", | |
hx_get=f"/curated/{target}", | |
hx_target=f"#{target}", | |
hx_trigger="change", | |
hx_swap="innerHTML", | |
) | |
slider = Input( | |
type="range", | |
name=f"doc_id_{target}", | |
min="0", | |
max="9", | |
value=str(doc_id), | |
hx_get=f"/curated/{target}", | |
hx_target=f"#{target}", | |
hx_trigger="change", | |
hx_swap="innerHTML", | |
hx_include=f'[name="data_source_{target}"]', | |
) | |
form = Form( | |
Div( | |
Label("Data source: ", drop_down), | |
) | |
if (data_sources is not None) | |
else None, | |
Div( | |
Label("Data sample: ", slider, f"{doc_id}", cls="plotly_slider"), | |
), | |
cls="plotly_input_container", | |
) | |
col1 = Div( | |
H3("Raw format"), | |
Pre( | |
json.dumps(before, indent=4), | |
style="white-space: pre-wrap; word-break: break-all;", | |
), | |
style="width: 48%; float: left; overflow-x: auto;", | |
) | |
col2 = Div( | |
H3("Extracted format"), | |
Pre( | |
json.dumps(after, indent=4), | |
style="white-space: pre-wrap; word-break: break-all;", | |
), | |
style="width: 48%; float: right; overflow-x: auto;", | |
) | |
data_display = Div( | |
col1, | |
col2, | |
style="overflow: auto; clear: both; height: 600px; border: 1px solid #ccc; padding: 20px;", | |
) | |
return Div(form, data_display, style="margin-top: 10px;", id=target) | |
def DVS( | |
left, | |
header, | |
): | |
col1 = Div( | |
Pre( | |
json.dumps(left, indent=4, ensure_ascii=False), | |
style="white-space: pre-wrap; word-break: break-all;", | |
), | |
style="float: left; overflow-x: auto;", | |
) | |
data_display = Div( | |
col1, | |
style="overflow: auto; clear: both; height: 200px; border: 1px solid #ccc; padding: 20px;", | |
) | |
return Div(H3(header), data_display, style="margin-top: 10px;") | |
def DV( | |
left_file, | |
doc_id, | |
header, | |
target: str = None, | |
): | |
if target is None: | |
target = "".join(random.choices(string.ascii_lowercase, k=8)) | |
if left_file.endswith("jsonl"): | |
left = [x for x in jsonlines.open(left_file)] | |
else: | |
left = json.load(open(left_file, encoding="utf-8")) | |
max_doc_id = len(left) - 1 | |
slider = Input( | |
type="range", | |
name=f"doc_id_{target}", | |
min="0", | |
max=str(max_doc_id), | |
value=str(doc_id), | |
hx_get=f"/update/{target}", | |
hx_target=f"#{target}", | |
hx_trigger="change", | |
hx_swap="innerHTML", | |
hx_vals=json.dumps({"left_file": f"{left_file}", "header": f"{header}"}), | |
) | |
form = Div( | |
H3(header), | |
Label( | |
"Data sample: ", slider, f"{doc_id} of {max_doc_id}", cls="plotly_slider" | |
), | |
cls="plotly_input_container", | |
style="padding: 20px;", | |
) | |
col1 = Div( | |
Pre( | |
json.dumps(left[doc_id], indent=4, ensure_ascii=False), | |
style="white-space: pre-wrap; word-break: break-all;", | |
), | |
style="float: left; overflow-x: auto;", | |
) | |
data_display = Div( | |
col1, | |
style="overflow: auto; clear: both; height: 600px; border: 1px solid #ccc; padding: 20px;", | |
) | |
return Div(form, data_display, style="margin-top: 10px;", id=target) | |
def DV2( | |
left_file, | |
right_file, | |
doc_id, | |
target: str = None, | |
): | |
if target is None: | |
target = "".join(random.choices(string.ascii_lowercase, k=8)) | |
left = json.load(open(left_file, encoding="utf-8")) | |
right = json.load(open(right_file, encoding="utf-8")) | |
max_doc_id = len(left) - 1 | |
slider = Input( | |
type="range", | |
name=f"doc_id_{target}", | |
min="0", | |
max=str(max_doc_id), | |
value=str(doc_id), | |
hx_get=f"/update/{target}", | |
hx_target=f"#{target}", | |
hx_trigger="change", | |
hx_swap="innerHTML", | |
hx_vals=json.dumps( | |
{"left_file": f"{left_file}", "right_file": f"{right_file}"} | |
), | |
) | |
form = Div( | |
Label( | |
"Data sample: ", slider, f"{doc_id} of {max_doc_id}", cls="plotly_slider" | |
), | |
cls="plotly_input_container", | |
style="padding: 20px;", | |
) | |
col1 = Div( | |
H3("Raw format", style="margin-top: 0px;"), | |
Pre( | |
json.dumps(left[doc_id], indent=4, ensure_ascii=False), | |
style="white-space: pre-wrap; word-break: break-all;", | |
), | |
style="width: 48%; float: left; overflow-x: auto;", | |
) | |
col2 = Div( | |
H3("Extracted format", style="margin-top: 0px;"), | |
Pre( | |
json.dumps(right[doc_id], indent=4, ensure_ascii=False), | |
style="white-space: pre-wrap; word-break: break-all;", | |
), | |
style="width: 48%; float: right; overflow-x: auto;", | |
) | |
data_display = Div( | |
col1, | |
col2, | |
style="overflow: auto; clear: both; height: 600px; border: 1px solid #ccc; padding: 20px;", | |
) | |
return Div(form, data_display, style="margin-top: 10px;", id=target) | |
def update(target: str, request): | |
params = request.query_params | |
doc_id = int(params.get(f"doc_id_{target}", 3)) | |
left_file = params.get("left_file") | |
right_file = params.get("right_file") | |
if left_file and right_file: | |
return ( | |
DV2( | |
left_file, | |
right_file, | |
doc_id, | |
target, | |
), | |
) | |
else: | |
return DV( | |
left_file, | |
doc_id, | |
params.get("header"), | |
target, | |
) | |