Spaces:
Sleeping
Sleeping
File size: 2,980 Bytes
f0671b0 fc828f1 8773ff3 798f8ba 8773ff3 798f8ba 8773ff3 32014a1 2271f96 32014a1 2271f96 28059a5 798f8ba 32014a1 fc828f1 32014a1 fc828f1 7335632 fc828f1 7335632 fc828f1 7335632 fc828f1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import os
import streamlit as st
from defaults import (
PROJECT_NAME,
ARGILLA_URL,
DIBT_PARENT_APP_URL,
DATASET_URL,
DATASET_REPO_ID,
)
def project_sidebar():
if PROJECT_NAME == "DEFAULT_DOMAIN":
st.warning(
"Please set up the project configuration in the parent app before proceeding."
)
st.stop()
st.sidebar.subheader(f"A Data Growing Project in the domain of {PROJECT_NAME}")
st.sidebar.markdown(
"""
This space helps you create a dataset seed for building diverse domain-specific datasets for aligning models.
"""
)
st.sidebar.link_button(f"📚 Dataset Repo", DATASET_URL)
st.sidebar.link_button(f"🤖 Argilla Space", ARGILLA_URL)
hub_username = DATASET_REPO_ID.split("/")[0]
project_name = DATASET_REPO_ID.split("/")[1]
st.session_state["project_name"] = project_name
st.session_state["hub_username"] = hub_username
st.session_state["hub_token"] = st.sidebar.text_input(
"Hub Token", type="password", value=os.environ.get("HF_TOKEN", None)
)
if st.session_state["hub_token"] is not None:
os.environ["HF_TOKEN"] = st.session_state["hub_token"]
st.sidebar.link_button(
"🤗 Get your Hub Token", "https://huggingface.co/settings/tokens"
)
if all(
(
st.session_state.get("project_name"),
st.session_state.get("hub_username"),
st.session_state.get("hub_token"),
)
):
st.success(f"Using the dataset repo {hub_username}/{project_name} on the Hub")
st.sidebar.divider()
st.sidebar.link_button("🧑🌾 New Project", DIBT_PARENT_APP_URL)
if st.session_state["hub_token"] is None:
st.error("Please provide a Hub token to generate answers")
st.stop()
def create_seed_terms(topics: list[str], perspectives: list[str]) -> list[str]:
"""Create seed terms for self intruct to start from."""
return [
f"{topic} from a {perspective} perspective"
for topic in topics
for perspective in perspectives
]
def create_application_instruction(
domain: str, system_prompt: str, examples: list[dict[str, str]]
) -> str:
"""Create the instruction for Self-Instruct task."""
system_prompt = f"""AI assistant in the domain of {domain}. {system_prompt}"""
examples_str = ""
for example in examples:
question = example["question"]
answer = example["answer"]
if len(answer) and len(question):
examples_str += f"""\n- Question: {question}\n- Answer: {answer}\n"""
examples_str += f"""\n- Question: {question}\n- Answer: {answer}\n"""
if len(examples_str):
system_prompt += """Below are some examples of questions and answers \
that the AI assistant would generate:"""
system_prompt += "\nExamples:"
system_prompt += f"\n{examples_str}"
return system_prompt
|