from datasets import load_dataset from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer import torch import time import evaluate import pandas as pd import numpy as np import streamlit as st st.title('Code Generation') huggingface_dataset_name = "red1xe/code_instructions" dataset = load_dataset(huggingface_dataset_name) if st.button("Load Model"): with st.spinner('Loading Model...'): model_name='google/flan-t5-base' original_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.bfloat16) tokenizer = AutoTokenizer.from_pretrained(model_name) x = st.slider('Select a sample', 0, 1000, 200) if st.button("Show Sample"): index = x input = dataset['test'][index]['input'] instruction = dataset['test'][index]['instruction'] output = dataset['test'][index]['output'] prompt = f""" Answer the following question. {input} {instruction} Answer: """ inputs = tokenizer(prompt, return_tensors='pt') outputs = tokenizer.decode( original_model.generate( inputs["input_ids"], max_new_tokens=200, )[0], skip_special_tokens=True ) dash_line = '-'.join('' for x in range(100)) st.write(dash_line) st.write(f'INPUT PROMPT:\n{prompt}') st.write(dash_line) st.write(f'BASELINE HUMAN SUMMARY:\n{output}\n') st.write(dash_line) st.write(f'MODEL GENERATION - ZERO SHOT:\n{outputs}')