File size: 3,049 Bytes
826e275 7a75a15 826e275 872c462 df273ff 826e275 df273ff d73a8e9 9194adc 826e275 6e4f775 7a75a15 826e275 e0bb50d df273ff 9194adc 1786f02 b598d9f df273ff 826e275 6e4f775 49efed6 6e4f775 943d773 826e275 dc50bdf 826e275 dc50bdf 49efed6 826e275 49efed6 0e07a66 49efed6 7303201 d73a8e9 826e275 49efed6 b1dd47e 826e275 b1dd47e 0499581 df273ff 0499581 d73a8e9 b1dd47e 49efed6 9c8dd72 872c462 9c8dd72 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
"""
The Streamlit app for the project demo.
In the demo, the user can write a prompt
and the model will generate a response using the grouped sampling algorithm.
"""
import os
from time import time
import streamlit as st
from grouped_sampling import GroupedSamplingPipeLine
from torch.cuda import CudaError
from huggingface_hub import logging as hf_hub_logging
from available_models import AVAILABLE_MODELS
from hanlde_form_submit import on_form_submit
def create_pipeline(model_name: str, group_size: int) -> GroupedSamplingPipeLine:
"""
Creates a pipeline with the given model name and group size.
:param model_name: The name of the model to use.
:param group_size: The size of the groups to use.
:return: A pipeline with the given model name and group size.
"""
st.write(f"Starts creating pipeline with model: {model_name}")
pipeline_start_time = time()
pipeline = GroupedSamplingPipeLine(
model_name=model_name,
group_size=group_size,
end_of_sentence_stop=False,
top_k=50,
)
pipeline_end_time = time()
pipeline_time = pipeline_end_time - pipeline_start_time
st.write(f"Finished creating pipeline with model: {model_name} in {pipeline_time:,.2f} seconds.")
return pipeline
hf_hub_logging.set_verbosity_error()
st.set_page_config(
page_title="讚讙讬诪讛 讘拽讘讜爪讜转 - 砖讬诪讜砖 讬注讬诇 讘诪讜讚诇讬 砖驻讛 住讬讘转讬讬诐",
layout="wide",
)
pipelines = {
model_name: create_pipeline(model_name, 1024) for model_name in AVAILABLE_MODELS[1:]
}
with st.form("request_form"):
selected_model_name: str = st.selectbox(
label="讘讞专讜 诪讜讚诇",
options=AVAILABLE_MODELS,
help="llama-30b-hf generates better texts but is slower",
)
output_length: int = st.number_input(
label="讻诪讜转 讛诪讬诇讬诐 讛诪拽住讬诪诇讬转 讘驻诇讟 - 讘讬谉 1 诇-1024",
min_value=1,
max_value=1024,
value=5,
)
submitted_prompt: str = st.text_area(
label="讛拽诇讟 诇讗诇讜讙专讬转诐 (讘讗谞讙诇讬转 讘诇讘讚)",
value="Instruction: Answer in yes or no.\n"
"Question: Is the sky blue?\n"
"Answer:",
max_chars=2048,
)
submitted: bool = st.form_submit_button(
label="爪讜专 讟拽住讟",
disabled=False,
)
if submitted:
try:
output = on_form_submit(
pipelines[selected_model_name],
output_length,
submitted_prompt,
)
except CudaError as e:
st.error("Out of memory. Please try a smaller model, shorter prompt, or a smaller output length.")
except (ValueError, TypeError, RuntimeError) as e:
st.error(e)
else:
st.write(f"Generated text: {output}")
user_instructions_file = os.path.join(
os.path.dirname(__file__),
"user_instructions_hebrew.md",
)
with open(user_instructions_file, "r") as fh:
long_description = fh.read()
st.markdown(long_description)
|