philipp-zettl commited on
Commit
a40a80d
1 Parent(s): 6c8898d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +251 -25
app.py CHANGED
@@ -1,10 +1,15 @@
1
  import gradio as gr
2
  import torch
3
- import spaces
4
  import itertools
5
  import pandas as pd
6
- from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
7
-
 
 
 
 
 
 
8
 
9
  model_name = 'philipp-zettl/t5-small-long-qa'
10
  qa_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
@@ -12,17 +17,177 @@ model_name = 'philipp-zettl/t5-small-qg'
12
  qg_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
13
  tokenizer = AutoTokenizer.from_pretrained('google/flan-t5-small')
14
 
 
 
 
15
  # Move only the student model to GPU if available
16
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
17
  qa_model = qa_model.to(device)
18
  qg_model = qg_model.to(device)
 
19
 
20
  max_questions = 1
21
  max_answers = 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
 
 
 
23
 
24
- def run_model(inputs, tokenizer, model, temperature=0.5, num_return_sequences=1):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  all_outputs = []
 
26
  for input_text in inputs:
27
  model_inputs = tokenizer([input_text], max_length=512, padding=True, truncation=True)
28
  input_ids = torch.tensor(model_inputs['input_ids']).to(device)
@@ -31,13 +196,31 @@ def run_model(inputs, tokenizer, model, temperature=0.5, num_return_sequences=1)
31
  with torch.no_grad():
32
  sample_output = model.generate(
33
  input_ids[:1],
34
- max_length=85,
35
- temperature=temperature,
36
- do_sample=True,
37
  num_return_sequences=num_return_sequences,
38
  low_memory=True,
39
- num_beams=max(2, num_return_sequences),
 
40
  use_cache=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  )
42
  for i, sample_output in enumerate(sample_output):
43
  sample_output = sample_output.unsqueeze(0)
@@ -49,19 +232,50 @@ def run_model(inputs, tokenizer, model, temperature=0.5, num_return_sequences=1)
49
 
50
 
51
  @spaces.GPU
52
- def gen(content, temperature_qg=0.5, temperature_qa=0.75, num_return_sequences_qg=1, num_return_sequences_qa=1):
53
  inputs = [
54
  f'context: {content}'
55
  ]
56
- question = run_model(inputs, tokenizer, qg_model, temperature_qg, num_return_sequences_qg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
- inputs = list(itertools.chain.from_iterable([
59
- [f'question: {q} {inputs[0]}' for q in q_set] for q_set in question
60
  ]))
61
- answer = run_model(inputs, tokenizer, qa_model, temperature_qa, num_return_sequences_qa)
 
 
 
 
 
 
 
 
 
62
 
63
- questions = list(itertools.chain.from_iterable(question))
64
- answers = list(itertools.chain.from_iterable(answer))
65
 
66
  results = []
67
  for idx, ans in enumerate(answers):
@@ -70,8 +284,9 @@ def gen(content, temperature_qg=0.5, temperature_qa=0.75, num_return_sequences_q
70
 
71
 
72
  def variable_outputs(k, max_elems=10):
 
73
  k = int(k)
74
- return [gr.Text(visible=True)] * k + [gr.Text(visible=False)] * (max(max_elems, 10)- k)
75
 
76
 
77
  def set_outputs(content, max_elems=10):
@@ -89,22 +304,32 @@ def create_file_download(qnas):
89
  return 'qnas.tsv'
90
 
91
 
92
- with gr.Blocks() as demo:
93
  with gr.Row(equal_height=True):
94
  with gr.Group("Content"):
95
  content = gr.Textbox(label='Content', lines=15, placeholder='Enter text here', max_lines=10_000)
96
  with gr.Group("Settings"):
97
- temperature_qg = gr.Slider(label='Temperature QG', value=0.5, minimum=0, maximum=1, step=0.01)
98
- temperature_qa = gr.Slider(label='Temperature QA', value=0.75, minimum=0, maximum=1, step=0.01)
99
- num_return_sequences_qg = gr.Number(label='Number Questions', value=max_questions, minimum=1, step=1, maximum=max(max_questions, 10))
100
- num_return_sequences_qa = gr.Number(label="Number Answers", value=max_answers, minimum=1, step=1, maximum=max(max_questions, 10))
 
101
 
102
  with gr.Row():
103
  gen_btn = gr.Button("Generate")
104
 
105
- @gr.render(inputs=[content, temperature_qg, temperature_qa, num_return_sequences_qg, num_return_sequences_qa], triggers=[gen_btn.click])
106
- def render_results(content, temperature_qg, temperature_qa, num_return_sequences_qg, num_return_sequences_qa):
107
- qnas = gen(content, temperature_qg, temperature_qa, num_return_sequences_qg, num_return_sequences_qa)
 
 
 
 
 
 
 
 
 
108
  df = gr.Dataframe(
109
  value=[u.values() for u in qnas],
110
  headers=['Question', 'Answer'],
@@ -116,4 +341,5 @@ with gr.Blocks() as demo:
116
  download = gr.DownloadButton(label='Download (without headers)', value=create_file_download(pd_df))
117
 
118
 
119
- demo.launch()
 
 
1
  import gradio as gr
2
  import torch
 
3
  import itertools
4
  import pandas as pd
5
+ import spaces
6
+ import random
7
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModel
8
+ from sklearn.metrics import pairwise_distances
9
+ from collections import Counter
10
+ from itertools import chain
11
+ from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
12
+ import math
13
 
14
  model_name = 'philipp-zettl/t5-small-long-qa'
15
  qa_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
 
17
  qg_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
18
  tokenizer = AutoTokenizer.from_pretrained('google/flan-t5-small')
19
 
20
+ embedding_model = AutoModel.from_pretrained('sentence-transformers/paraphrase-MiniLM-L6-v2')
21
+ embedding_tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/paraphrase-MiniLM-L6-v2')
22
+
23
  # Move only the student model to GPU if available
24
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
25
  qa_model = qa_model.to(device)
26
  qg_model = qg_model.to(device)
27
+ embedding_model = embedding_model.to(device)
28
 
29
  max_questions = 1
30
  max_answers = 1
31
+ max_elem_value = 100
32
+
33
+
34
+
35
+ def ngrams(sequence, n):
36
+ return [tuple(sequence[i:i+n]) for i in range(len(sequence)-n+1)]
37
+
38
+ def count_ngrams(sequence, max_n):
39
+ counts = Counter()
40
+ for n in range(1, max_n + 1):
41
+ counts.update(ngrams(sequence, n))
42
+ return counts
43
+
44
+ def self_bleu(outputs):
45
+ smoothing_function = SmoothingFunction().method1
46
+ scores = []
47
+ for i in range(len(outputs)):
48
+ references = outputs[:i] + outputs[i+1:]
49
+ # Avoid calculating BLEU score for empty references
50
+ if references:
51
+ scores.append(sentence_bleu(references, outputs[i], smoothing_function=smoothing_function))
52
+ # If all references are empty, return a default value
53
+ if not scores:
54
+ return 0
55
+ return sum(scores) / len(scores)
56
+
57
+ def dist_n(outputs, n):
58
+ all_ngrams = list(chain(*[ngrams(output, n) for output in outputs]))
59
+ unique_ngrams = set(all_ngrams)
60
+ return len(unique_ngrams) / len(all_ngrams) if all_ngrams else 0
61
+
62
+ def perplexity(model, tokenizer, texts):
63
+ encodings = tokenizer(texts, return_tensors='pt', padding=True, truncation=True)
64
+ max_length = model.config.n_positions
65
+ stride = 512
66
+ lls = []
67
+ for i in range(0, encodings.input_ids.size(1), stride):
68
+ begin_loc = max(i + stride - max_length, 0)
69
+ end_loc = i + stride
70
+ trg_len = end_loc - i
71
+ input_ids = encodings.input_ids[:, begin_loc:end_loc].to(model.device)
72
+ target_ids = input_ids.clone()
73
+ target_ids[:, :-trg_len] = -100
74
+
75
+ with torch.no_grad():
76
+ outputs = model(input_ids, labels=target_ids)
77
+ log_likelihood = outputs.loss * trg_len
78
+ lls.append(log_likelihood)
79
+
80
+ ppl = torch.exp(torch.stack(lls).sum() / end_loc)
81
+ return ppl.item()
82
+
83
+ def embedding_similarity(inputs, outputs):
84
+ global embedding_model, embedding_tokenizer, device
85
+ def embed(texts):
86
+ inputs = embedding_tokenizer(texts, return_tensors='pt', padding=True, truncation=True).to(device)
87
+ with torch.no_grad():
88
+ outputs = embedding_model(**inputs)
89
+ return outputs.last_hidden_state.mean(dim=1).cpu().numpy()
90
+
91
+ input_embeddings = embed(inputs)
92
+ output_embeddings = embed(outputs)
93
+
94
+ similarities = pairwise_distances(input_embeddings, output_embeddings, metric='cosine')
95
+ return sum(similarities) / len(similarities)
96
+
97
+ def js_divergence(p, q):
98
+ def kl_divergence(p, q):
99
+ return sum(p[i] * math.log(p[i] / q[i]) for i in range(len(p)) if p[i] != 0 and q[i] != 0)
100
+
101
+ p_norm = [float(i)/sum(p) for i in p]
102
+ q_norm = [float(i)/sum(q) for i in q]
103
+
104
+ m = [(p_norm[i] + q_norm[i]) / 2 for i in range(len(p_norm))]
105
+
106
+ return (kl_divergence(p_norm, m) + kl_divergence(q_norm, m)) / 2
107
+
108
+ def evaluate_model(num_beams, num_beam_groups, model, tokenizer, eval_data, max_length=85):
109
+ generated_outputs = []
110
+
111
+ for input_text in eval_data:
112
+ input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)
113
+ outputs = model.generate(
114
+ input_ids,
115
+ num_beams=num_beams,
116
+ num_beam_groups=num_beam_groups,
117
+ diversity_penalty=1.0,
118
+ max_new_tokens=max_length,
119
+ )
120
+ decoded_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
121
+ generated_outputs.append(decoded_text.split())
122
+
123
+ # Self-BLEU for diversity
124
+ diversity_score = self_bleu(generated_outputs)
125
 
126
+ # Dist-1 and Dist-2 for diversity
127
+ dist1 = dist_n(generated_outputs, 1)
128
+ dist2 = dist_n(generated_outputs, 2)
129
 
130
+ # Perplexity for fluency and relevance
131
+ fluency_score = perplexity(model, tokenizer, [" ".join(output) for output in generated_outputs])
132
+
133
+ # Embedding similarity for contextual relevance
134
+ contextual_score = embedding_similarity(eval_data, [" ".join(output) for output in generated_outputs])
135
+
136
+ # Jensen-Shannon Divergence for distribution similarity
137
+ generated_ngrams = count_ngrams(list(chain(*generated_outputs)), 4)
138
+ reference_ngrams = count_ngrams(list(chain(*[tokenizer.tokenize(text) for text in eval_data])), 4)
139
+ all_ngrams = set(generated_ngrams.keys()).union(set(reference_ngrams.keys()))
140
+ p = [generated_ngrams[ngram] for ngram in all_ngrams]
141
+ q = [reference_ngrams[ngram] for ngram in all_ngrams]
142
+ jsd_score = js_divergence(p, q)
143
+
144
+ return {
145
+ "diversity_score": diversity_score,
146
+ "dist1": dist1,
147
+ "dist2": dist2,
148
+ "fluency_score": fluency_score,
149
+ "contextual_score": contextual_score,
150
+ "jsd_score": jsd_score
151
+ }
152
+
153
+ def find_best_parameters(eval_data, model, tokenizer, max_length=85):
154
+
155
+ # Parameter ranges
156
+ parameter_map = {
157
+ 2: [2],
158
+ 4: [2],
159
+ 6: [2], # 6x3 == 4x2
160
+ 8: [2], # 8x4 == 6x3 == 4x2
161
+ 10: [2], # 10x5 == 8x4 == 6x3 == 4x2
162
+ }
163
+
164
+ # Find the best parameters
165
+ best_score = -float('inf')
166
+ best_params = None
167
+
168
+ for num_beams in parameter_map.keys():
169
+ for num_beam_groups in parameter_map[num_beams]:
170
+ if num_beam_groups > num_beams:
171
+ continue # num_beam_groups should not be greater than num_beams
172
+
173
+ scores = evaluate_model(num_beams, num_beam_groups, model, tokenizer, eval_data, max_length=max_length)
174
+ # Combine scores to determine the best parameters
175
+ combined_score = (scores['dist1'] + scores['dist2'] - scores['fluency_score'] + scores['contextual_score'] - scores['jsd_score']).mean()
176
+ print(f"num_beams={num_beams}, num_beam_groups={num_beam_groups}, avg combined score={combined_score}")
177
+
178
+ if combined_score > best_score:
179
+ best_score = combined_score
180
+ best_params = (num_beams, num_beam_groups)
181
+
182
+ print(f"Best parameters: num_beams={best_params[0]}, num_beam_groups={best_params[1]} with combined score={best_score}")
183
+ return best_params
184
+
185
+
186
+
187
+
188
+ def run_model(inputs, tokenizer, model, num_beams=2, num_beam_groups=2, temperature=0.5, num_return_sequences=1, max_length=85):
189
  all_outputs = []
190
+ torch.manual_seed(42069)
191
  for input_text in inputs:
192
  model_inputs = tokenizer([input_text], max_length=512, padding=True, truncation=True)
193
  input_ids = torch.tensor(model_inputs['input_ids']).to(device)
 
196
  with torch.no_grad():
197
  sample_output = model.generate(
198
  input_ids[:1],
199
+ max_length=max_length,
200
+ #temperature=temperature,
201
+ #do_sample=True,
202
  num_return_sequences=num_return_sequences,
203
  low_memory=True,
204
+ #top_p=temperature,
205
+ #num_beams=max(2, num_return_sequences),
206
  use_cache=True,
207
+ # Contrastive search
208
+ #penalty_alpha=0.6,
209
+ #top_k=4,
210
+ # Multi-nomial sampling
211
+ #do_sample=True,
212
+ #num_beams=1,
213
+ # Beam search
214
+ #num_beams=5,
215
+ # Beam search multinomial sampling
216
+ #num_beams=5,
217
+ #do_sample=True,
218
+ # Diverse Beam search decoding
219
+ num_beams=max(2, num_return_sequences),
220
+ num_beam_groups=max(2, num_return_sequences),
221
+ diversity_penalty=temperature,
222
+ #do_sample=True,
223
+
224
  )
225
  for i, sample_output in enumerate(sample_output):
226
  sample_output = sample_output.unsqueeze(0)
 
232
 
233
 
234
  @spaces.GPU
235
+ def gen(content, temperature_qg=0.5, temperature_qa=0.75, num_return_sequences_qg=1, num_return_sequences_qa=1, max_length=85):
236
  inputs = [
237
  f'context: {content}'
238
  ]
239
+ question = run_model(
240
+ inputs,
241
+ tokenizer,
242
+ qg_model,
243
+ num_beams=num_return_sequences_qg,
244
+ num_beam_groups=num_return_sequences_qg,
245
+ temperature=temperature_qg,
246
+ num_return_sequences=num_return_sequences_qg,
247
+ max_length=max_length
248
+ )
249
+
250
+ q_params = find_best_parameters(list(chain.from_iterable(question)), qg_model, tokenizer, max_length=max_length)
251
+
252
+ question = run_model(
253
+ inputs,
254
+ tokenizer,
255
+ qg_model,
256
+ num_beams=q_params[0],
257
+ num_beam_groups=q_params[1],
258
+ temperature=temperature_qg,
259
+ num_return_sequences=num_return_sequences_qg,
260
+ max_length=max_length
261
+ )
262
 
263
+ inputs = list(chain.from_iterable([
264
+ [f'question: {q} context: {content}' for q in q_set] for q_set in question
265
  ]))
266
+ answer = run_model(
267
+ inputs,
268
+ tokenizer,
269
+ qa_model,
270
+ num_beams=num_return_sequences_qa,
271
+ num_beam_groups=num_return_sequences_qa,
272
+ temperature=temperature_qa,
273
+ num_return_sequences=num_return_sequences_qa,
274
+ max_length=max_length
275
+ )
276
 
277
+ questions = list(chain.from_iterable(question))
278
+ answers = list(chain.from_iterable(answer))
279
 
280
  results = []
281
  for idx, ans in enumerate(answers):
 
284
 
285
 
286
  def variable_outputs(k, max_elems=10):
287
+ global max_elem_value
288
  k = int(k)
289
+ return [gr.Text(visible=True)] * k + [gr.Text(visible=False)] * (max(max_elems, max_elem_value)- k)
290
 
291
 
292
  def set_outputs(content, max_elems=10):
 
304
  return 'qnas.tsv'
305
 
306
 
307
+ with gr.Blocks(css='.hidden_input {display: none;}') as demo:
308
  with gr.Row(equal_height=True):
309
  with gr.Group("Content"):
310
  content = gr.Textbox(label='Content', lines=15, placeholder='Enter text here', max_lines=10_000)
311
  with gr.Group("Settings"):
312
+ temperature_qg = gr.Slider(label='Temperature QG', value=0.2, minimum=0, maximum=1, step=0.01)
313
+ temperature_qa = gr.Slider(label='Temperature QA', value=0.5, minimum=0, maximum=1, step=0.01)
314
+ max_length = gr.Number(label='Max Length', value=85, minimum=1, step=1, maximum=512)
315
+ num_return_sequences_qg = gr.Number(label='Number Questions', value=max_questions, minimum=1, step=1, maximum=max(max_questions, max_elem_value))
316
+ num_return_sequences_qa = gr.Number(label="Number Answers", value=max_answers, minimum=1, step=1, maximum=max(max_questions, max_elem_value))
317
 
318
  with gr.Row():
319
  gen_btn = gr.Button("Generate")
320
 
321
+ @gr.render(
322
+ inputs=[
323
+ content, temperature_qg, temperature_qa, num_return_sequences_qg, num_return_sequences_qa,
324
+ max_length
325
+ ],
326
+ triggers=[gen_btn.click]
327
+ )
328
+ def render_results(content, temperature_qg, temperature_qa, num_return_sequences_qg, num_return_sequences_qa, max_length):
329
+ qnas = gen(
330
+ content, temperature_qg, temperature_qa, num_return_sequences_qg, num_return_sequences_qa,
331
+ max_length
332
+ )
333
  df = gr.Dataframe(
334
  value=[u.values() for u in qnas],
335
  headers=['Question', 'Answer'],
 
341
  download = gr.DownloadButton(label='Download (without headers)', value=create_file_download(pd_df))
342
 
343
 
344
+
345
+ demo.launch()