Spaces:
Paused
Paused
init
Browse files- README.md +3 -3
- app.py +21 -0
- requirements.txt +4 -0
README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
---
|
2 |
-
title: Openthaigpt Gpt2 Pantipwiki Poc
|
3 |
-
emoji:
|
4 |
colorFrom: purple
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.19.1
|
8 |
app_file: app.py
|
|
|
1 |
---
|
2 |
+
title: Kobkrit Openthaigpt Gpt2 Pantipwiki Poc
|
3 |
+
emoji: 🔥
|
4 |
colorFrom: purple
|
5 |
+
colorTo: yellow
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.19.1
|
8 |
app_file: app.py
|
app.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import GPT2LMHeadModel, GPT2Tokenizer
|
3 |
+
import pandas as pd
|
4 |
+
import torch
|
5 |
+
from torch.utils.data import Dataset, random_split
|
6 |
+
from transformers import GPT2Tokenizer, TrainingArguments, Trainer, GPT2LMHeadModel
|
7 |
+
|
8 |
+
pretrained_name = "kobkrit/openthaigpt-gpt2-pantipwiki-poc"
|
9 |
+
|
10 |
+
tokenizer = GPT2Tokenizer.from_pretrained(pretrained_name, bos_token='<|startoftext|>',unk_token='<|unk|>', eos_token='<|endoftext|>', pad_token='<|pad|>')
|
11 |
+
model = GPT2LMHeadModel.from_pretrained(pretrained_name).cuda()
|
12 |
+
model.resize_token_embeddings(len(tokenizer))
|
13 |
+
|
14 |
+
def gen(input):
|
15 |
+
generated = tokenizer("<|startoftext|>"+input, return_tensors="pt").input_ids.cuda()
|
16 |
+
output = model.generate(generated, top_k=50, num_beams=5, no_repeat_ngram_size=2,
|
17 |
+
early_stopping=True, max_length=300, top_p=0.95, temperature=1.9)
|
18 |
+
return tokenizer.decode(output[0], skip_special_tokens=True)
|
19 |
+
|
20 |
+
demo = gr.Interface(fn=gen, inputs=gr.Textbox(lines=3, label="Input Text", value="Q: อยากลดความอ้วน ทำอย่างไร\n\nA:"), outputs="text")
|
21 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
transformers[sentencepiece]
|
2 |
+
datasets
|
3 |
+
#--extra-index-url https://download.pytorch.org/whl/cpu
|
4 |
+
torch==1.13.1
|