Files changed (1) hide show
  1. app.py +50 -19
app.py CHANGED
@@ -185,9 +185,9 @@ def find_best_parameters(eval_data, model, tokenizer, max_length=85):
185
 
186
 
187
 
188
- def run_model(inputs, tokenizer, model, num_beams=2, num_beam_groups=2, temperature=0.5, num_return_sequences=1, max_length=85):
189
  all_outputs = []
190
- torch.manual_seed(42069)
191
  for input_text in inputs:
192
  model_inputs = tokenizer([input_text], max_length=512, padding=True, truncation=True)
193
  input_ids = torch.tensor(model_inputs['input_ids']).to(device)
@@ -232,7 +232,7 @@ def run_model(inputs, tokenizer, model, num_beams=2, num_beam_groups=2, temperat
232
 
233
 
234
  @spaces.GPU
235
- def gen(content, temperature_qg=0.5, temperature_qa=0.75, num_return_sequences_qg=1, num_return_sequences_qa=1, max_length=85):
236
  inputs = [
237
  f'context: {content}'
238
  ]
@@ -244,21 +244,24 @@ def gen(content, temperature_qg=0.5, temperature_qa=0.75, num_return_sequences_q
244
  num_beam_groups=num_return_sequences_qg,
245
  temperature=temperature_qg,
246
  num_return_sequences=num_return_sequences_qg,
247
- max_length=max_length
 
248
  )
249
 
250
- q_params = find_best_parameters(list(chain.from_iterable(question)), qg_model, tokenizer, max_length=max_length)
251
-
252
- question = run_model(
253
- inputs,
254
- tokenizer,
255
- qg_model,
256
- num_beams=q_params[0],
257
- num_beam_groups=q_params[1],
258
- temperature=temperature_qg,
259
- num_return_sequences=num_return_sequences_qg,
260
- max_length=max_length
261
- )
 
 
262
 
263
  inputs = list(chain.from_iterable([
264
  [f'question: {q} context: {content}' for q in q_set] for q_set in question
@@ -271,7 +274,8 @@ def gen(content, temperature_qg=0.5, temperature_qa=0.75, num_return_sequences_q
271
  num_beam_groups=num_return_sequences_qa,
272
  temperature=temperature_qa,
273
  num_return_sequences=num_return_sequences_qa,
274
- max_length=max_length
 
275
  )
276
 
277
  questions = list(chain.from_iterable(question))
@@ -305,6 +309,30 @@ def create_file_download(qnas):
305
 
306
 
307
  with gr.Blocks(css='.hidden_input {display: none;}') as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308
  with gr.Row(equal_height=True):
309
  with gr.Group("Content"):
310
  content = gr.Textbox(label='Content', lines=15, placeholder='Enter text here', max_lines=10_000)
@@ -314,6 +342,8 @@ with gr.Blocks(css='.hidden_input {display: none;}') as demo:
314
  max_length = gr.Number(label='Max Length', value=85, minimum=1, step=1, maximum=512)
315
  num_return_sequences_qg = gr.Number(label='Number Questions', value=max_questions, minimum=1, step=1, maximum=max(max_questions, max_elem_value))
316
  num_return_sequences_qa = gr.Number(label="Number Answers", value=max_answers, minimum=1, step=1, maximum=max(max_questions, max_elem_value))
 
 
317
 
318
  with gr.Row():
319
  gen_btn = gr.Button("Generate")
@@ -321,14 +351,14 @@ with gr.Blocks(css='.hidden_input {display: none;}') as demo:
321
  @gr.render(
322
  inputs=[
323
  content, temperature_qg, temperature_qa, num_return_sequences_qg, num_return_sequences_qa,
324
- max_length
325
  ],
326
  triggers=[gen_btn.click]
327
  )
328
  def render_results(content, temperature_qg, temperature_qa, num_return_sequences_qg, num_return_sequences_qa, max_length):
329
  qnas = gen(
330
  content, temperature_qg, temperature_qa, num_return_sequences_qg, num_return_sequences_qa,
331
- max_length
332
  )
333
  df = gr.Dataframe(
334
  value=[u.values() for u in qnas],
@@ -342,4 +372,5 @@ with gr.Blocks(css='.hidden_input {display: none;}') as demo:
342
 
343
 
344
 
 
345
  demo.launch()
 
185
 
186
 
187
 
188
+ def run_model(inputs, tokenizer, model, num_beams=2, num_beam_groups=2, temperature=0.5, num_return_sequences=1, max_length=85, seed=42069):
189
  all_outputs = []
190
+ torch.manual_seed(seed)
191
  for input_text in inputs:
192
  model_inputs = tokenizer([input_text], max_length=512, padding=True, truncation=True)
193
  input_ids = torch.tensor(model_inputs['input_ids']).to(device)
 
232
 
233
 
234
  @spaces.GPU
235
+ def gen(content, temperature_qg=0.5, temperature_qa=0.75, num_return_sequences_qg=1, num_return_sequences_qa=1, max_length=85, seed=42069, optimize_questions=False):
236
  inputs = [
237
  f'context: {content}'
238
  ]
 
244
  num_beam_groups=num_return_sequences_qg,
245
  temperature=temperature_qg,
246
  num_return_sequences=num_return_sequences_qg,
247
+ max_length=max_length,
248
+ seed=seed
249
  )
250
 
251
+ if optimize_questions:
252
+ q_params = find_best_parameters(list(chain.from_iterable(question)), qg_model, tokenizer, max_length=max_length)
253
+
254
+ question = run_model(
255
+ inputs,
256
+ tokenizer,
257
+ qg_model,
258
+ num_beams=q_params[0],
259
+ num_beam_groups=q_params[1],
260
+ temperature=temperature_qg,
261
+ num_return_sequences=num_return_sequences_qg,
262
+ max_length=max_length,
263
+ seed=seed
264
+ )
265
 
266
  inputs = list(chain.from_iterable([
267
  [f'question: {q} context: {content}' for q in q_set] for q_set in question
 
274
  num_beam_groups=num_return_sequences_qa,
275
  temperature=temperature_qa,
276
  num_return_sequences=num_return_sequences_qa,
277
+ max_length=max_length,
278
+ seed=seed
279
  )
280
 
281
  questions = list(chain.from_iterable(question))
 
309
 
310
 
311
  with gr.Blocks(css='.hidden_input {display: none;}') as demo:
312
+ with gr.Row(equal_height=True):
313
+ gr.Markdown(
314
+ """
315
+ # QA-Generator
316
+ A combination of fine-tuned flan-T5(-small) models chained into sequence
317
+ to generate:
318
+
319
+ A) a versatile set of questions
320
+ B) an accurate set of matching answers
321
+
322
+ according to a given piece of text content.
323
+
324
+ The idea is simple:
325
+
326
+ 1. Add your content
327
+ 2. Select the amount of questions you want to generate
328
+ 2.2 (optional) Select the amount of answers you want to generate per goven question
329
+ 3. Press generate
330
+ 4. ???
331
+ 5. Profit
332
+
333
+ If you're satisfied with the generated data set, you can export it as TSV
334
+ to edit or import it into your favourite tool.
335
+ """)
336
  with gr.Row(equal_height=True):
337
  with gr.Group("Content"):
338
  content = gr.Textbox(label='Content', lines=15, placeholder='Enter text here', max_lines=10_000)
 
342
  max_length = gr.Number(label='Max Length', value=85, minimum=1, step=1, maximum=512)
343
  num_return_sequences_qg = gr.Number(label='Number Questions', value=max_questions, minimum=1, step=1, maximum=max(max_questions, max_elem_value))
344
  num_return_sequences_qa = gr.Number(label="Number Answers", value=max_answers, minimum=1, step=1, maximum=max(max_questions, max_elem_value))
345
+ seed = gr.Number(label="seed", value=42069)
346
+ optimize_questions = gr.Checkbox(label="Optimize questions?", value=False)
347
 
348
  with gr.Row():
349
  gen_btn = gr.Button("Generate")
 
351
  @gr.render(
352
  inputs=[
353
  content, temperature_qg, temperature_qa, num_return_sequences_qg, num_return_sequences_qa,
354
+ max_length, seed, optimize_questions
355
  ],
356
  triggers=[gen_btn.click]
357
  )
358
  def render_results(content, temperature_qg, temperature_qa, num_return_sequences_qg, num_return_sequences_qa, max_length):
359
  qnas = gen(
360
  content, temperature_qg, temperature_qa, num_return_sequences_qg, num_return_sequences_qa,
361
+ max_length, seed, optimize_questions
362
  )
363
  df = gr.Dataframe(
364
  value=[u.values() for u in qnas],
 
372
 
373
 
374
 
375
+ demo.queue()
376
  demo.launch()