from datasets import load_dataset from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer import torch import time import evaluate import pandas as pd import numpy as np from huggingface_hub import login import streamlit as st st.set_page_config( page_title="Code Generation", page_icon="🤖", layout="wide", initial_sidebar_state="expanded", ) login(token='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', add_to_git_credential=True) st.title("Code Generation") huggingface_dataset_name = "red1xe/code_instructions" dataset = load_dataset(huggingface_dataset_name) model_name='gpt2' tokenizer = AutoTokenizer.from_pretrained("bigcode/starcoder") original_model = AutoModelForCausalLM.from_pretrained("bigcode/starcoder") x = st.slider(label='Select a sample', min_value=0, max_value=1000, value=500, step=10) if st.button("Show Sample"): index = x input = dataset['test'][index]['input'] instruction = dataset['test'][index]['instruction'] output = dataset['test'][index]['output'] prompt = f""" Answer the following question. {input} {instruction} Answer: """ inputs = tokenizer(prompt, return_tensors='pt') outputs = tokenizer.decode( original_model.generate( inputs["input_ids"], max_new_tokens=200, )[0], skip_special_tokens=True ) dash_line = '-'.join('' for x in range(100)) st.write(dash_line) st.write(f'INPUT PROMPT:\n{prompt}') st.write(dash_line) st.write(f'BASELINE HUMAN SUMMARY:\n{output}\n') st.write(dash_line) st.write(f'MODEL GENERATION - ZERO SHOT:\n{outputs}')