|
from datasets import load_dataset |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer |
|
import torch |
|
import time |
|
import evaluate |
|
import pandas as pd |
|
import numpy as np |
|
from huggingface_hub import login |
|
|
|
import streamlit as st |
|
|
|
st.set_page_config( |
|
page_title="Code Generation", |
|
page_icon="π€", |
|
layout="wide", |
|
initial_sidebar_state="expanded", |
|
) |
|
login(token='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', add_to_git_credential=True) |
|
|
|
st.title("Code Generation") |
|
huggingface_dataset_name = "red1xe/code_instructions" |
|
dataset = load_dataset(huggingface_dataset_name) |
|
model_name='gpt2' |
|
tokenizer = AutoTokenizer.from_pretrained("bigcode/starcoder") |
|
original_model = AutoModelForCausalLM.from_pretrained("bigcode/starcoder") |
|
|
|
x = st.slider(label='Select a sample', min_value=0, max_value=1000, value=500, step=10) |
|
if st.button("Show Sample"): |
|
index = x |
|
|
|
input = dataset['test'][index]['input'] |
|
instruction = dataset['test'][index]['instruction'] |
|
output = dataset['test'][index]['output'] |
|
|
|
prompt = f""" |
|
Answer the following question. |
|
|
|
{input} {instruction} |
|
|
|
Answer: |
|
""" |
|
|
|
inputs = tokenizer(prompt, return_tensors='pt') |
|
outputs = tokenizer.decode( |
|
original_model.generate( |
|
inputs["input_ids"], |
|
max_new_tokens=200, |
|
)[0], |
|
skip_special_tokens=True |
|
) |
|
|
|
dash_line = '-'.join('' for x in range(100)) |
|
st.write(dash_line) |
|
st.write(f'INPUT PROMPT:\n{prompt}') |
|
st.write(dash_line) |
|
st.write(f'BASELINE HUMAN SUMMARY:\n{output}\n') |
|
st.write(dash_line) |
|
st.write(f'MODEL GENERATION - ZERO SHOT:\n{outputs}') |