File size: 6,709 Bytes
3d8b295
 
814e23a
0b3a0fe
3d8b295
 
 
434878e
95dfd9b
3d8b295
434878e
 
814e23a
3d8b295
e300032
 
fcf6d89
69460b6
 
fcf6d89
e300032
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77c181e
e300032
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69460b6
e300032
c6e51bd
 
 
95dfd9b
d35cae7
3d8b295
434878e
fcf6d89
 
3d8b295
9512d4d
434878e
aeb84b0
21346d9
aeb84b0
 
ea3b994
aeb84b0
 
 
 
3d8b295
e300032
 
 
bf8ac05
c6e51bd
73dfdae
c6e51bd
c078aee
 
 
e300032
 
 
c078aee
e300032
 
 
c6e51bd
4784142
be3f3fa
c6e51bd
 
 
fa7d51d
c6e51bd
 
 
0183241
 
 
 
0b3a0fe
8e3579a
 
 
 
 
e300032
c6e51bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
import gradio as gr
from transformers import AutoTokenizer
import json
from functools import partial

tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")

demo_conversation = """[
    {"role": "system", "content": "You are a helpful chatbot."},
    {"role": "user", "content": "Hi there!"},
    {"role": "assistant", "content": "Hello, human!"},
    {"role": "user", "content": "Can I ask a question?"}
]"""

chat_templates = {
    "chatml": """{% for message in messages %}
    {{ "<|im_start|>" + message["role"] + "\\n" + message["content"] + "<|im_end|>\\n" }}
{% endfor %}
{% if add_generation_prompt %}
    {{ "<|im_start|>assistant\\n" }}
{% endif %}""",
    "zephyr": """{% for message in messages %}
{% if message['role'] == 'user' %}
{{ '<|user|>\n' + message['content'] + eos_token }}
{% elif message['role'] == 'system' %}
{{ '<|system|>\n' + message['content'] + eos_token }}
{% elif message['role'] == 'assistant' %}
{{ '<|assistant|>\n'  + message['content'] + eos_token }}
{% endif %}
{% if loop.last and add_generation_prompt %}
{{ '<|assistant|>' }}
{% endif %}
{% endfor %}""",
    "llama": """{% if messages[0]['role'] == 'system' %}
{% set loop_messages = messages[1:] %}
{% set system_message = messages[0]['content'] %}
{% else %}
{% set loop_messages = messages %}
{% set system_message = false %}
{% endif %}
{% for message in loop_messages %}
{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}
{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}
{% endif %}
{% if loop.index0 == 0 and system_message != false %}
{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}
{% else %}
{% set content = message['content'] %}
{% endif %}
{% if message['role'] == 'user' %}
{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}
{% elif message['role'] == 'assistant' %}
{{ ' '  + content.strip() + ' ' + eos_token }}
{% endif %}
{% endfor %}""",
    "alpaca": """{% for message in messages %}
{% if message['role'] == 'system' %}
{{ message['content'] + '\n\n' }}
{% elif message['role'] == 'user' %}
{{ '### Instruction:\n' + message['content'] + '\n\n' }}
{% elif message['role'] == 'assistant' %}
{{ '### Response:\n'  + message['content'] + '\n\n' }}
{% endif %}
{% if loop.last and add_generation_prompt %}
{{ '### Response:\n' }}
{% endif %}
{% endfor %}""",
    "vicuna": """{% for message in messages %}
{% if message['role'] == 'system' %}
{{ message['content'] + '\n' }}
{% elif message['role'] == 'user' %}
{{ 'USER:\n' + message['content'] + '\n' }}
{% elif message['role'] == 'assistant' %}
{{ 'ASSISTANT:\n'  + message['content'] + '\n' }}
{% endif %}
{% if loop.last and add_generation_prompt %}
{{ 'ASSISTANT:\n' }}
{% endif %}
{% endfor %}""",
    "falcon": """{% for message in messages %}
{% if not loop.first %}
{{ '\n' }}
{% endif %}
{% if message['role'] == 'system' %}
{{ 'System: ' }}
{% elif message['role'] == 'user' %}
{{ 'User: ' }}
{% elif message['role'] == 'assistant' %}
{{ 'Falcon: ' }}
{% endif %}
{{ message['content'] }}
{% endfor %}
{% if add_generation_prompt %}
{{ '\n' + 'Falcon:' }}
{% endif %}"""
}
description_text = """# Chat Template Creator

### This space is a helper app for writing [Chat Templates](https://huggingface.co/docs/transformers/main/en/chat_templating).

### When you're happy with the outputs from your template, you can use the code block at the end to add it to a PR!"""

def apply_chat_template(template, test_conversation, add_generation_prompt, cleanup_whitespace):
    if cleanup_whitespace:
        template = "".join([line.strip() for line in template.split('\n')])
    tokenizer.chat_template = template
    outputs = []
    conversation = json.loads(test_conversation)
    pr_snippet = (
        "CHECKPOINT = \"big-ai-company/cool-new-model\"\n"
        "tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT)",
        f"tokenizer.chat_template = \"{template}\"",
        "tokenizer.push_to_hub(CHECKPOINT, create_pr=True)"
    )
    pr_snippet = "\n".join(pr_snippet)
    formatted = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=add_generation_prompt)
    return formatted, pr_snippet

def load_template(template_name):
    template_in.value = chat_templates[template_name]

with gr.Blocks() as demo:

    gr.Markdown(description_text)

    with gr.Row():
        gr.Markdown("### Pick an existing template to start:")
    with gr.Row():
        load_chatml = gr.Button("ChatML")
        load_zephyr = gr.Button("Zephyr")
        load_llama = gr.Button("LLaMA")
    with gr.Row():
        load_alpaca = gr.Button("Alpaca")
        load_vicuna = gr.Button("Vicuna")
        load_falcon = gr.Button("Falcon")
    with gr.Row():
        with gr.Column():
            template_in = gr.TextArea(value=chat_templates["chatml"], lines=10, max_lines=30, label="Chat Template")
            conversation_in = gr.TextArea(value=demo_conversation, lines=6, label="Conversation")
            generation_prompt_check = gr.Checkbox(value=False, label="Add generation prompt")
            cleanup_whitespace_check = gr.Checkbox(value=True, label="Cleanup template whitespace")
            submit = gr.Button("Apply template", variant="primary")
        with gr.Column():
            formatted_out = gr.TextArea(label="Formatted conversation")
            code_snippet_out = gr.TextArea(label="Code snippet to create PR", lines=3, show_label=True, show_copy_button=True)
    submit.click(fn=apply_chat_template, 
                inputs=[template_in, conversation_in, generation_prompt_check, cleanup_whitespace_check],
                outputs=[formatted_out, code_snippet_out]
                )
    load_chatml.click(fn=partial(load_template, "chatml"))
    load_zephyr.click(fn=partial(load_template, "zephyr"))
    load_llama.click(fn=partial(load_template, "llama"))
    load_alpaca.click(fn=partial(load_template, "alpaca"))
    load_vicuna.click(fn=partial(load_template, "vicuna"))
    load_falcon.click(fn=partial(load_template, "falcon"))


demo.launch()

#iface = gr.Interface(
#    description=description_text,
#    fn=apply_chat_template,
#    inputs=[
#        gr.TextArea(value=default_template, lines=10, max_lines=30, label="Chat Template"),
#        gr.TextArea(value=demo_conversation, lines=6, label="Conversation"),
#        gr.Checkbox(value=False, label="Add generation prompt"),
#        gr.Checkbox(value=True, label="Cleanup template whitespace"),
#    ],
#    outputs=[
#        gr.TextArea(label="Formatted conversation"),
#        gr.TextArea(label="Code snippet to create PR", lines=3, show_label=True, show_copy_button=True)
#    ]
#)
#iface.launch()