Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -8,7 +8,6 @@ import evaluate
|
|
8 |
bleu = evaluate.load("bleu")
|
9 |
|
10 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
11 |
-
print(HF_TOKEN)
|
12 |
client = InferenceClient(model="bigcode/starcoder", token=HF_TOKEN)
|
13 |
|
14 |
login(token=HF_TOKEN)
|
@@ -16,7 +15,7 @@ checkpoint = "bigcode/starcoder"
|
|
16 |
tokenizer = AutoTokenizer.from_pretrained(checkpoint, use_auth_token=True)
|
17 |
|
18 |
df = pd.read_csv("samples.csv")
|
19 |
-
|
20 |
|
21 |
description = "<h1 style='text-align: center; color: #333333; font-size: 40px;'>StarCoder Memorization Verifier"
|
22 |
high_bleu_examples = {
|
@@ -190,6 +189,10 @@ def low_bleu_mirror(x):
|
|
190 |
output = low_bleu_examples[x]
|
191 |
return output
|
192 |
|
|
|
|
|
|
|
|
|
193 |
with gr.Blocks() as demo:
|
194 |
with gr.Column():
|
195 |
gr.Markdown(description)
|
@@ -201,8 +204,15 @@ with gr.Blocks() as demo:
|
|
201 |
label="Original",
|
202 |
)
|
203 |
|
|
|
|
|
|
|
|
|
204 |
with gr.Accordion("Advanced parameters", open=False):
|
205 |
-
k = gr.Slider(minimum=1, maximum=250, value=50
|
|
|
|
|
|
|
206 |
submit = gr.Button("Check", variant="primary")
|
207 |
high_bleu_examples = gr.Examples(list(high_bleu_examples.keys()), label="High memorization samples",
|
208 |
inputs=instruction, outputs=instruction,
|
@@ -211,13 +221,22 @@ with gr.Blocks() as demo:
|
|
211 |
inputs=instruction, outputs=instruction,
|
212 |
fn=low_bleu_mirror, cache_examples=True)
|
213 |
with gr.Column():
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
submit.click(
|
219 |
complete,
|
220 |
inputs=[instruction, k],
|
221 |
outputs=[output, label],
|
222 |
)
|
|
|
223 |
demo.queue(concurrency_count=16).launch(debug=True)
|
|
|
8 |
bleu = evaluate.load("bleu")
|
9 |
|
10 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
|
|
11 |
client = InferenceClient(model="bigcode/starcoder", token=HF_TOKEN)
|
12 |
|
13 |
login(token=HF_TOKEN)
|
|
|
15 |
tokenizer = AutoTokenizer.from_pretrained(checkpoint, use_auth_token=True)
|
16 |
|
17 |
df = pd.read_csv("samples.csv")
|
18 |
+
df = df[["content"]].iloc[:50]
|
19 |
|
20 |
description = "<h1 style='text-align: center; color: #333333; font-size: 40px;'>StarCoder Memorization Verifier"
|
21 |
high_bleu_examples = {
|
|
|
189 |
output = low_bleu_examples[x]
|
190 |
return output
|
191 |
|
192 |
+
def df_select(evt: gr.SelectData):
|
193 |
+
|
194 |
+
return evt.value
|
195 |
+
|
196 |
with gr.Blocks() as demo:
|
197 |
with gr.Column():
|
198 |
gr.Markdown(description)
|
|
|
204 |
label="Original",
|
205 |
)
|
206 |
|
207 |
+
with gr.Column():
|
208 |
+
output = gr.Textbox(lines=5, label="Completion", interactive=False)
|
209 |
+
with gr.Row():
|
210 |
+
with gr.Column():
|
211 |
with gr.Accordion("Advanced parameters", open=False):
|
212 |
+
k = gr.Slider(minimum=1, maximum=250, value=50,
|
213 |
+
label="Prefix size",
|
214 |
+
info="""Number of tokens used in the prompt.
|
215 |
+
Lower (higher) levels reduce (increase) the risk of memorization, as large context length increase memorization risks.""")
|
216 |
submit = gr.Button("Check", variant="primary")
|
217 |
high_bleu_examples = gr.Examples(list(high_bleu_examples.keys()), label="High memorization samples",
|
218 |
inputs=instruction, outputs=instruction,
|
|
|
221 |
inputs=instruction, outputs=instruction,
|
222 |
fn=low_bleu_mirror, cache_examples=True)
|
223 |
with gr.Column():
|
224 |
+
label = gr.Label(value={"BLEU": 0},label="Memorization score (BLEU)")
|
225 |
+
gr.Markdown("""[BLEU](https://huggingface.co/spaces/evaluate-metric/bleu) score is a metric that can be used to measure similarity of two sentences.
|
226 |
+
Here, the higher the BLEU score, the more likely the model learn by heart that example.
|
227 |
+
You can reduce the Prefix size in the Advanced parameters to reduce the context length and see if the model still extracts the training sample.""")
|
228 |
+
|
229 |
+
with gr.Row():
|
230 |
+
with gr.Column():
|
231 |
+
gr.Markdown("""# More samples from The Stack.
|
232 |
+
The examples shown above come from [The Stack](https://huggingface.co/datasets/bigcode/the-stack-dedup), an open-source dataset of code data.
|
233 |
+
To try other examples from The Stack, you can browse the table below and click on training samples you wish to assess the memorisation score.""")
|
234 |
+
with gr.Accordion("More samples", open=False):
|
235 |
+
table = gr.DataFrame(value=df, row_count=5, label="Samples from The Stack", interactive=False)
|
236 |
submit.click(
|
237 |
complete,
|
238 |
inputs=[instruction, k],
|
239 |
outputs=[output, label],
|
240 |
)
|
241 |
+
table.select(fn=df_select, outputs=instruction)
|
242 |
demo.queue(concurrency_count=16).launch(debug=True)
|