File size: 3,049 Bytes
826e275
 
7a75a15
 
826e275
872c462
df273ff
826e275
 
df273ff
d73a8e9
9194adc
826e275
6e4f775
7a75a15
826e275
e0bb50d
df273ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9194adc
 
1786f02
 
 
 
b598d9f
df273ff
 
 
 
826e275
6e4f775
49efed6
6e4f775
943d773
826e275
 
 
dc50bdf
826e275
dc50bdf
49efed6
826e275
 
 
49efed6
0e07a66
49efed6
7303201
d73a8e9
826e275
 
 
49efed6
b1dd47e
826e275
 
 
b1dd47e
0499581
df273ff
0499581
 
 
d73a8e9
 
 
b1dd47e
49efed6
 
9c8dd72
 
872c462
 
 
 
 
9c8dd72
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
"""
The Streamlit app for the project demo.
In the demo, the user can write a prompt
 and the model will generate a response using the grouped sampling algorithm.
"""
import os
from time import time

import streamlit as st
from grouped_sampling import GroupedSamplingPipeLine
from torch.cuda import CudaError
from huggingface_hub import logging as hf_hub_logging

from available_models import AVAILABLE_MODELS
from hanlde_form_submit import on_form_submit


def create_pipeline(model_name: str, group_size: int) -> GroupedSamplingPipeLine:
    """
    Creates a pipeline with the given model name and group size.
    :param model_name: The name of the model to use.
    :param group_size: The size of the groups to use.
    :return: A pipeline with the given model name and group size.
    """
    st.write(f"Starts creating pipeline with model: {model_name}")
    pipeline_start_time = time()
    pipeline = GroupedSamplingPipeLine(
        model_name=model_name,
        group_size=group_size,
        end_of_sentence_stop=False,
        top_k=50,
    )
    pipeline_end_time = time()
    pipeline_time = pipeline_end_time - pipeline_start_time
    st.write(f"Finished creating pipeline with model: {model_name} in {pipeline_time:,.2f} seconds.")
    return pipeline


hf_hub_logging.set_verbosity_error()

st.set_page_config(
    page_title="讚讙讬诪讛 讘拽讘讜爪讜转 - 砖讬诪讜砖 讬注讬诇 讘诪讜讚诇讬 砖驻讛 住讬讘转讬讬诐",
    layout="wide",
)

pipelines = {
    model_name: create_pipeline(model_name, 1024) for model_name in AVAILABLE_MODELS[1:]
}

with st.form("request_form"):
    selected_model_name: str = st.selectbox(
        label="讘讞专讜 诪讜讚诇",
        options=AVAILABLE_MODELS,
        help="llama-30b-hf generates better texts but is slower",
    )

    output_length: int = st.number_input(
        label="讻诪讜转 讛诪讬诇讬诐 讛诪拽住讬诪诇讬转 讘驻诇讟 - 讘讬谉 1 诇-1024",
        min_value=1,
        max_value=1024,
        value=5,
    )

    submitted_prompt: str = st.text_area(
        label="讛拽诇讟 诇讗诇讜讙专讬转诐 (讘讗谞讙诇讬转 讘诇讘讚)",
        value="Instruction: Answer in yes or no.\n"
              "Question: Is the sky blue?\n"
              "Answer:",
        max_chars=2048,
    )

    submitted: bool = st.form_submit_button(
        label="爪讜专 讟拽住讟",
        disabled=False,
    )

    if submitted:
        try:
            output = on_form_submit(
                pipelines[selected_model_name],
                output_length,
                submitted_prompt,
            )
        except CudaError as e:
            st.error("Out of memory. Please try a smaller model, shorter prompt, or a smaller output length.")
        except (ValueError, TypeError, RuntimeError) as e:
            st.error(e)
        else:
            st.write(f"Generated text: {output}")


user_instructions_file = os.path.join(
    os.path.dirname(__file__),
    "user_instructions_hebrew.md",
)
with open(user_instructions_file, "r") as fh:
    long_description = fh.read()
st.markdown(long_description)