red1xe commited on
Commit
f100939
1 Parent(s): 7f51a5a

Add application file

Browse files
Files changed (1) hide show
  1. app.py +43 -3
app.py CHANGED
@@ -9,8 +9,48 @@ import numpy as np
9
  import streamlit as st
10
  st.title('Code Generation')
11
  huggingface_dataset_name = "red1xe/code_instructions"
 
 
 
12
 
13
- dataset = load_dataset(huggingface_dataset_name)
 
14
 
15
- st.write("## Dataset")
16
- st.write(dataset)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  import streamlit as st
10
  st.title('Code Generation')
11
  huggingface_dataset_name = "red1xe/code_instructions"
12
+ if st.button("Load Dataset"):
13
+ with st.spinner('Loading Dataset...'):
14
+ dataset = load_dataset(huggingface_dataset_name)
15
 
16
+ if st.button("Show Dataset"):
17
+ st.write(dataset)
18
 
19
+ if st.button("Load Model"):
20
+ with st.spinner('Loading Model...'):
21
+ model_name='google/flan-t5-base'
22
+ original_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
23
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
24
+
25
+ x = st.slider('Select a sample', 0, 1000, 200)
26
+ if st.button("Show Sample"):
27
+ index = x
28
+
29
+ input = dataset['test'][index]['input']
30
+ instruction = dataset['test'][index]['instruction']
31
+ output = dataset['test'][index]['output']
32
+
33
+ prompt = f"""
34
+ Answer the following question.
35
+
36
+ {input} {instruction}
37
+
38
+ Answer:
39
+ """
40
+
41
+ inputs = tokenizer(prompt, return_tensors='pt')
42
+ outputs = tokenizer.decode(
43
+ original_model.generate(
44
+ inputs["input_ids"],
45
+ max_new_tokens=200,
46
+ )[0],
47
+ skip_special_tokens=True
48
+ )
49
+
50
+ dash_line = '-'.join('' for x in range(100))
51
+ st.write(dash_line)
52
+ st.write(f'INPUT PROMPT:\n{prompt}')
53
+ st.write(dash_line)
54
+ st.write(f'BASELINE HUMAN SUMMARY:\n{output}\n')
55
+ st.write(dash_line)
56
+ st.write(f'MODEL GENERATION - ZERO SHOT:\n{outputs}')