seriouspark commited on
Commit
8c1187a
โ€ข
1 Parent(s): e69882a

make moogeul_ver2

Browse files
Files changed (1) hide show
  1. app.py +394 -0
app.py ADDED
@@ -0,0 +1,394 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ import numpy as np
3
+ import pandas as pd
4
+ import re
5
+ import json
6
+ import os
7
+ import glob
8
+
9
+ import torch
10
+ import torch.nn.functional as F
11
+ from torch.optim import Adam
12
+ from tqdm import tqdm
13
+ from torch import nn
14
+ from transformers import BertModel
15
+ from transformers import AutoTokenizer
16
+
17
+ import argparse
18
+ from bs4 import BeautifulSoup
19
+ import requests
20
+
21
+ def split_essay_to_sentence(origin_essay):
22
+ origin_essay_sentence = sum([[a.strip() for a in i.split('.')] for i in origin_essay.split('\n')], [])
23
+ essay_sent = [a for a in origin_essay_sentence if len(a) > 0]
24
+ return essay_sent
25
+
26
+ def get_first_extraction(text_sentence):
27
+ row_dict = {}
28
+ for row in tqdm(text_sentence):
29
+ question = 'what is the feeling?'
30
+ answer = question_answerer(question=question, context=row)
31
+ row_dict[row] = answer
32
+ return row_dict
33
+
34
+
35
+ def get_sent_labeldata():
36
+ label =pd.read_csv('./rawdata/sentimental_label.csv', encoding = 'cp949', header = None)
37
+ label[1] = label[1].apply(lambda x : re.findall(r'[๊ฐ€-ํžฃ]+', x)[0])
38
+ label_dict =label[label.index % 10 == 0].set_index(0).to_dict()[1]
39
+ emo2idx = {v : k for k, v in enumerate(label_dict.items())}
40
+ idx2emo = {v : k[1] for k, v in emo2idx.items()}
41
+ return emo2idx, idx2emo
42
+
43
+
44
+ class myDataset_for_infer(torch.utils.data.Dataset):
45
+ def __init__(self, X):
46
+ self.X = X
47
+
48
+ def __len__(self):
49
+ return len(self.X)
50
+
51
+ def __getitem__(self,idx):
52
+ sentences = tokenizer(self.X[idx], return_tensors = 'pt', padding = 'max_length', max_length = 96, truncation = True)
53
+ return sentences
54
+
55
+
56
+ def infer_data(model, main_feeling_keyword):
57
+ #ds = myDataset_for_infer()
58
+ df_infer = myDataset_for_infer(main_feeling_keyword)
59
+
60
+ infer_dataloader = torch.utils.data.DataLoader(df_infer, batch_size= 16)
61
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
62
+
63
+ if device == 'cuda':
64
+ model = model.cuda()
65
+
66
+ result_list = []
67
+ with torch.no_grad():
68
+ for idx, infer_input in tqdm(enumerate(infer_dataloader)):
69
+ mask = infer_input['attention_mask'].to(device)
70
+ input_id = infer_input['input_ids'].squeeze(1).to(device)
71
+
72
+ output = model(input_id, mask)
73
+ result = np.argmax(output.logits, axis=1).numpy()
74
+ result_list.extend(result)
75
+ return result_list
76
+
77
+ def get_word_emotion_pair(cls_model, origin_essay_sentence, idx2emo):
78
+
79
+ import re
80
+ def get_noun(sent):
81
+ return [re.sub(r'[์„๋ฅผ]+', '', vocab) for (vocab, pos) in nlp(sent) if len(vocab) > 1 and pos == 'NOUN']
82
+ def get_adj(sent):
83
+ return [re.sub(r'[์„๋ฅผ]+', '', vocab) for (vocab, pos) in nlp(sent) if len(vocab) > 1 and pos == 'ADJ']
84
+ def get_verb(sent):
85
+ return [re.sub(r'[์„๋ฅผ]+', '', vocab) for (vocab, pos) in nlp(sent) if len(vocab) > 1 and pos == 'VERB']
86
+
87
+ result_list = infer_data(cls_model, origin_essay_sentence)
88
+ final_result = pd.DataFrame(data = {'text': origin_essay_sentence , 'label' : result_list})
89
+ final_result['emotion'] = final_result['label'].map(idx2emo)
90
+
91
+ nlp=lambda x:[(x[t["start"]:t["end"]],t["entity_group"]) for t in pipeline(x)]
92
+ #essay_sent_pos = [nlp(i) for i in tqdm(essay_sent)]
93
+ #final_result['text_pos'] = essay_sent_pos
94
+ final_result['noun_list'] = final_result['text'].map(get_noun)
95
+ final_result['adj_list'] = final_result['text'].map(get_adj)
96
+ final_result['verb_list'] = final_result['text'].map(get_verb)
97
+
98
+ final_result['title'] = 'none'
99
+ file_made_dt = datetime.datetime.now()
100
+ file_made_dt_str = datetime.datetime.strftime(file_made_dt, '%Y%m%d_%H%M%d')
101
+ os.makedirs(f'./result/{nickname}/{file_made_dt_str}/', exist_ok = True)
102
+ final_result.to_csv(f"./result/{nickname}/{file_made_dt_str}/essay_result.csv", index = False)
103
+
104
+ return final_result, file_made_dt_str
105
+
106
+
107
+ def get_essay_base_analysis(file_made_dt_str, nickname):
108
+ essay1 = pd.read_csv(f"./result/{nickname}/{file_made_dt_str}/essay_result.csv")
109
+ essay1['noun_list_len'] = essay1['noun_list'].apply(lambda x : len(x))
110
+ essay1['noun_list_uniqlen'] = essay1['noun_list'].apply(lambda x : len(set(x)))
111
+ essay1['adj_list_len'] = essay1['adj_list'].apply(lambda x : len(x))
112
+ essay1['adj_list_uniqlen'] = essay1['adj_list'].apply(lambda x : len(set(x)))
113
+ essay1['vocab_all'] = essay1[['noun_list','adj_list']].apply(lambda x : sum((eval(x[0]),eval(x[1])), []), axis=1)
114
+ essay1['vocab_cnt'] = essay1['vocab_all'].apply(lambda x : len(x))
115
+ essay1['vocab_unique_cnt'] = essay1['vocab_all'].apply(lambda x : len(set(x)))
116
+ essay1['noun_list'] = essay1['noun_list'].apply(lambda x : eval(x))
117
+ essay1['adj_list'] = essay1['adj_list'].apply(lambda x : eval(x))
118
+ d = essay1.groupby('title')[['noun_list','adj_list']].sum([]).reset_index()
119
+ d['noun_cnt'] = d['noun_list'].apply(lambda x : len(set(x)))
120
+ d['adj_cnt'] = d['adj_list'].apply(lambda x : len(set(x)))
121
+
122
+ # ๋ฌธ์žฅ ๊ธฐ์ค€ ์ตœ๊ณ  ๊ฐ์ •
123
+ essay_summary =essay1.groupby(['title'])['emotion'].value_counts().unstack(level =1)
124
+
125
+ emo_vocab_dict = {}
126
+ for k, v in essay1[['emotion','noun_list']].values:
127
+ for vocab in v:
128
+ if (k, 'noun', vocab) not in emo_vocab_dict:
129
+ emo_vocab_dict[(k, 'noun', vocab)] = 0
130
+
131
+ emo_vocab_dict[(k, 'noun', vocab)] += 1
132
+
133
+ for k, v in essay1[['emotion','adj_list']].values:
134
+ for vocab in v:
135
+ if (k, 'adj', vocab) not in emo_vocab_dict:
136
+ emo_vocab_dict[(k, 'adj', vocab)] = 0
137
+
138
+ emo_vocab_dict[(k, 'adj', vocab)] += 1
139
+ vocab_emo_cnt_dict = {}
140
+ for k, v in essay1[['emotion','noun_list']].values:
141
+ for vocab in v:
142
+ if (vocab, 'noun') not in vocab_emo_cnt_dict:
143
+ vocab_emo_cnt_dict[('noun', vocab)] = {}
144
+ if k not in vocab_emo_cnt_dict[( 'noun', vocab)]:
145
+ vocab_emo_cnt_dict[( 'noun', vocab)][k] = 0
146
+
147
+ vocab_emo_cnt_dict[('noun', vocab)][k] += 1
148
+
149
+ for k, v in essay1[['emotion','adj_list']].values:
150
+ for vocab in v:
151
+ if ('adj', vocab) not in vocab_emo_cnt_dict:
152
+ vocab_emo_cnt_dict[( 'adj', vocab)] = {}
153
+ if k not in vocab_emo_cnt_dict[( 'adj', vocab)]:
154
+ vocab_emo_cnt_dict[( 'adj', vocab)][k] = 0
155
+
156
+ vocab_emo_cnt_dict[('adj', vocab)][k] += 1
157
+
158
+ vocab_emo_cnt_df = pd.DataFrame(vocab_emo_cnt_dict).T
159
+ vocab_emo_cnt_df['total'] = vocab_emo_cnt_df.sum(axis=1)
160
+ # ๋‹จ์–ด๋ณ„ ์ตœ๊ณ  ๊ฐ์ • ๋ฐ ๊ฐ์ • ๊ฐœ์ˆ˜
161
+ all_result=vocab_emo_cnt_df.sort_values(by = 'total', ascending = False)
162
+
163
+ # ๋‹จ์–ด๋ณ„ ์ตœ๊ณ  ๊ฐ์ • ๋ฐ ๊ฐ์ • ๊ฐœ์ˆ˜ , ํ˜•์šฉ์‚ฌ ํฌํ•จ ์‹œ
164
+ adj_result=vocab_emo_cnt_df.sort_values(by = 'total', ascending = False)
165
+
166
+ # ๋ช…์‚ฌ๋งŒ ์‚ฌ์šฉ ์‹œ
167
+ noun_result=vocab_emo_cnt_df[vocab_emo_cnt_df.index.get_level_values(0) == 'noun'].sort_values(by = 'total', ascending = False)
168
+
169
+ final_file_name = f"essay_all_vocab_result.csv"
170
+ adj_file_name = f"essay_adj_vocab_result.csv"
171
+ noun_file_name = f"essay_noun_vocab_result.csv"
172
+
173
+ os.makedirs(f'./result/{nickname}/{file_made_dt_str}/', exist_ok = True)
174
+
175
+ all_result.to_csv(f"./result/{nickname}/{file_made_dt_str}/essay_all_vocab_result.csv", index = False)
176
+ adj_result.to_csv(f"./result/{nickname}/{file_made_dt_str}/essay_adj_vocab_result.csv", index = False)
177
+ noun_result.to_csv(f"./result/{nickname}/{file_made_dt_str}/essay_noun_vocab_result.csv", index = False)
178
+
179
+ return all_result, adj_result, noun_result, essay_summary, file_made_dt_str
180
+
181
+
182
+ from transformers import pipeline
183
+ #model_name = 'AlexKay/xlm-roberta-large-qa-multilingual-finedtuned-ru'
184
+ model_name = 'monologg/koelectra-base-v2-finetuned-korquad'
185
+ question_answerer = pipeline("question-answering", model=model_name)
186
+
187
+ from transformers import AutoTokenizer,AutoModelForTokenClassification,TokenClassificationPipeline
188
+ tokenizer=AutoTokenizer.from_pretrained("KoichiYasuoka/roberta-large-korean-upos")
189
+ posmodel=AutoModelForTokenClassification.from_pretrained("KoichiYasuoka/roberta-large-korean-upos")
190
+
191
+ pipeline=TokenClassificationPipeline(tokenizer=tokenizer,
192
+ model=posmodel,
193
+ aggregation_strategy="simple",
194
+ task = 'token-classification')
195
+ nlp=lambda x:[(x[t["start"]:t["end"]],t["entity_group"]) for t in pipeline(x)]
196
+
197
+ from transformers import AutoModelForSequenceClassification
198
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
199
+
200
+ def all_process(origin_essay, nickname):
201
+ essay_sent =split_essay_to_sentence(origin_essay)
202
+ row_dict = {}
203
+ for row in tqdm(essay_sent):
204
+ question = 'what is the feeling?'
205
+ answer = question_answerer(question=question, context=row)
206
+ row_dict[row] = answer
207
+ emo2idx, idx2emo = get_sent_labeldata()
208
+ tokenizer = AutoTokenizer.from_pretrained('bert-base-multilingual-cased')
209
+ cls_model = AutoModelForSequenceClassification.from_pretrained('seriouspark/bert-base-multilingual-cased-finetuning-sentimental-6label')
210
+ #cls_model = AutoModelForSequenceClassification.from_pretrained('bert-base-multilingual-cased', num_labels = 6)
211
+
212
+ final_result, file_name_dt = get_word_emotion_pair(cls_model, essay_sent, idx2emo)
213
+ all_result, adj_result, noun_result, essay_summary, file_made_dt_str = get_essay_base_analysis(file_name_dt, nickname)
214
+
215
+ summary_result = pd.concat([adj_result, noun_result]).fillna(0).sort_values(by = 'total', ascending = False).fillna(0).reset_index()[:30]
216
+ with open(f'./result/{nickname}/{file_name_dt}/summary.json','w') as f:
217
+ json.dump( essay_summary.to_json(),f)
218
+ with open(f'./result/{nickname}/{file_made_dt_str}/all_result.json','w') as f:
219
+ json.dump( all_result.to_json(),f)
220
+ with open(f'./result/{nickname}/{file_made_dt_str}/adj_result.json','w') as f:
221
+ json.dump( adj_result.to_json(),f)
222
+ with open(f'./result/{nickname}/{file_made_dt_str}/noun_result.json','w') as f:
223
+ json.dump( noun_result.to_json(),f)
224
+ #return essay_summary, summary_result
225
+ total_cnt = essay_summary.sum(axis=1).values[0]
226
+ essay_summary_list = sorted(essay_summary.T.to_dict()['none'].items(), key = lambda x: x[1], reverse =True)
227
+ essay_summary_list_str = ' '.join([f'{row[0]} {int(row[1]*100 / total_cnt)}%' for row in essay_summary_list])
228
+ summary1 = f"""{nickname}๋‹˜, ๋‹น์‹ ์˜ ๊ธ€ ์†์—์„œ ๋Š๊ปด์ง€๋Š” ๊ฐ์ •๋ถ„ํฌ๋Š” [{essay_summary_list_str}] ์ž…๋‹ˆ๋‹ค"""
229
+
230
+ return summary1
231
+
232
+ def get_similar_vocab(message):
233
+ #print(re.findall('[๊ฐ€-ํžฃ]+',message))
234
+ if (len(message) > 0) & (len(re.findall('[๊ฐ€-ํžฃ]+',message))>0):
235
+ vocab =ใ…Žใ…‘ใ…… message
236
+ all_dict_url = f"https://dict.naver.com/search.dict?dicQuery={vocab}&query={vocab}&target=dic&ie=utf8&query_utf=&isOnlyViewEE="
237
+ response = requests.get(all_dict_url)
238
+
239
+ html_content = response.text
240
+ # BeautifulSoup๋กœ HTML ํŒŒ์‹ฑ
241
+ soup = BeautifulSoup(html_content, 'html.parser')
242
+ resulttext = soup.find('script').string
243
+
244
+ # "similarWordName" ๋‹ค์Œ์˜ ๋‹จ์–ด ์ถ”์ถœ
245
+ similar_words = re.findall(r'similarWordName:"([^"]+)"', resulttext)
246
+ similar_words_final = list(set(sum([re.findall('[๊ฐ€-ํžฃ]+', i) for i in similar_words], [])))
247
+
248
+ return similar_words_final
249
+ else:
250
+ return '๋‹จ์–ด๋ฅผ ์ž…๋ ฅํ•ด ์ฃผ์„ธ์š”'
251
+
252
+ def get_similar_means(vocab):
253
+
254
+ all_dict_url = f"https://dict.naver.com/search.dict?dicQuery={vocab}&query={vocab}&target=dic&ie=utf8&query_utf=&isOnlyViewEE="
255
+ response = requests.get(all_dict_url)
256
+
257
+ html_content = response.text
258
+ # BeautifulSoup๋กœ HTML ํŒŒ์‹ฑ
259
+ soup = BeautifulSoup(html_content, 'html.parser')
260
+ resulttext = soup.find('script').string
261
+
262
+ # "meanList" ๋‹ค์Œ์˜ ๋ฆฌ์ŠคํŠธ ์ถ”์ถœ (๋ฆฌ์ŠคํŠธ ๋‚ด์šฉ์„ ๋ฌธ์ž์—ด๋กœ ์ถ”์ถœ)
263
+ mean_list_str = re.findall(r'meanList:(\[.*?\])', resulttext, re.DOTALL)
264
+
265
+ matches_list = []
266
+ for i in range(len(mean_list_str)):
267
+ matches = re.findall(r'mean:"(.*?)"', mean_list_str[i])
268
+ matches_list.append(matches)
269
+
270
+ mean_list_str_final = [i for i in sum(matches_list, []) if (len(re.findall(r'[A-Za-z0-9]', i) )==0 ) & (len(re.findall(r'[๊ฐ€-ํžฃ]', i) )!=0 )]
271
+
272
+ return mean_list_str_final
273
+
274
+ #info_dict = {}
275
+ def run_all(message, history):
276
+ global info_dict
277
+
278
+ if message.find('๋‹‰๋„ค์ž„:')>=0:
279
+ global nickname
280
+ nickname = message.replace('๋‹‰๋„ค์ž„','').replace(':','').strip()
281
+ #global nickname
282
+ info_dict[nickname] = {}
283
+ return f'''์ข‹์•„์š”! ์‹œ์ž‘ํ• ๊ฒŒ์š” {nickname}๋‹˜.
284
+ ์ง€๊ธˆ ๋จธ๋ฆฟ์†์— ๋– ์˜ค๋ฅด๋Š” ๋‹จ์–ด๋ฅผ ํ•˜๋‚˜ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”.
285
+ ๋‹จ์–ด๋ฅผ ์ž…๋ ฅํ•  ๋• \"๋‹จ์–ด: \" ๋ฅผ ํฌํ•จํ•ด์ฃผ์„ธ์š”
286
+ (๋‹จ์–ด: ์ปคํ”ผ)
287
+ '''
288
+ try :
289
+ #print(nickname)
290
+ if message.find('๋‹จ์–ด:')>=0:
291
+ clear_message = message.replace('๋‹จ์–ด','').replace(':','').strip()
292
+ info_dict[nickname]['main_word'] = clear_message
293
+ vocab_mean_list = []
294
+ similar_words_final = get_similar_vocab(message)
295
+ similar_words_final_with_main = similar_words_final + [message]
296
+ if len(similar_words_final_with_main)>0:
297
+ for w in similar_words_final_with_main:
298
+ temp_means = get_similar_means(w)
299
+ vocab_mean_list.append(temp_means)
300
+ fixed_similar_words_final = list(set([i for i in sum(vocab_mean_list, []) if len(i) > 10]))[:10]
301
+
302
+
303
+ word_str = ' \n'.join([str(idx) + ") " + i for idx, i in enumerate(similar_words_final, 1)])
304
+ sentence_str = ' \n'.join([str(idx) + ") " + i for idx, i in enumerate(fixed_similar_words_final, 1)])
305
+ return f'''<{clear_message}> ์„ ํ™œ์šฉํ•œ ๊ธ€์“ฐ๊ธฐ๋ฅผ ์‹œ์ž‘ํ•ด๋ณผ๊นŒ์š”?
306
+ ์šฐ์„ , ์œ ์‚ฌํ•œ ๋‹จ์–ด๋ถ€ํ„ฐ ํ™•์ธํ•ด๋ณผ๊ฒŒ์š”.
307
+ {word_str}
308
+ \n
309
+ ์œ ์‚ฌํ•œ ๋‹จ์–ด๋“ค์˜ ๋œป์€ ์•„๋ž˜์™€ ๊ฐ™์Šต๋‹ˆ๋‹ค.
310
+ {sentence_str}
311
+ \n
312
+ \n
313
+
314
+ ์›ํ•˜๋Š” ๋ฌธ์žฅ์„ ๊ณจ๋ผ์„œ "๋ฌธ์žฅ:" ์„ ํฌํ•จํ•ด ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”.
315
+ '''
316
+ else:
317
+ return '\"๋‹จ์–ด:\" ๋ฅผ ํฌํ•จํ•ด์„œ ๋‹จ์–ด๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š” (๋‹จ์–ด: ์ปคํ”ผ)'
318
+
319
+ elif message.find('๋ฌธ์žฅ:')>=0:
320
+ clear_message = message.replace('๋ฌธ์žฅ','').replace(':','').strip()
321
+ info_dict[nickname]['selected_sentence'] = clear_message
322
+ return f'''<{clear_message}>๋ฅผ ๊ณ ๋ฅด์…จ๋„ค์š”.
323
+ \n
324
+ ์œ„ ๋ฌธ์žฅ์„ ํ™œ์šฉํ•ด ์งง์€ ๊ธ€์“ฐ๊ธฐ๋ฅผ ํ•ด๋ณผ๊นŒ์š”?
325
+
326
+ \"์งง์€๊ธ€: \"์„ ํฌํ•จํ•ด ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”
327
+ (์งง์€๊ธ€: ์ง€๊ธˆ ๋ฐฅ์„ ๋จน๊ณ  ์žˆ๋Š” ์ค‘์ด๋‹ค)
328
+
329
+ '''
330
+
331
+ elif message.find('์งง์€๊ธ€:')>=0:
332
+ clear_message = message.replace('์งง์€๊ธ€','').replace(':','').strip()
333
+ info_dict[nickname]['short_contents'] = clear_message
334
+
335
+ return f'''<{clear_message}>๋ผ๊ณ  ์ž…๋ ฅํ•ด์ฃผ์…จ๋„ค์š”.
336
+ \n ์œ„ ๋ฌธ์žฅ์„ ํ™œ์šฉํ•ด ๊ธด ๊ธ€์“ฐ๊ธฐ๋ฅผ ํ•ด๋ณผ๊นŒ์š”? 500์ž ์ด์ƒ ์ž‘์„ฑํ•ด์ฃผ์‹œ๋ฉด ์ข‹์•„์š”.
337
+ \n \"๊ธด๊ธ€: \"์„ ํฌํ•จํ•ด ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”
338
+ \n (๊ธด๊ธ€: ์ง€๊ธˆ ๋ฐฅ์„ ๋จน๊ณ  ์žˆ๋Š” ์ค‘๏ฟฝ๏ฟฝ๋‹ค. ๋ฐฅ์„ ๋จน์„๋•Œ ๋งˆ๋‹ค ๋‚˜๋Š” ๋ฐฅ์•Œ์„ ํ˜“๋ฐ”๋‹ฅ์œผ๋กœ ๊ตด๋ ค๋ณธ๋‹ค. ... (์ƒ๋žต) )
339
+
340
+ '''
341
+ elif message.find('๊ธด๊ธ€:')>=0:
342
+ long_message = message.replace('๊ธด๊ธ€','').replace(':','').strip()
343
+
344
+ length_of_lm = len(long_message)
345
+ if length_of_lm >= 500:
346
+ info_dict['long_contents'] = long_message
347
+ os.makedirs(f"./result/{nickname}/", exist_ok = True)
348
+ with open(f"./result/{nickname}/contents.txt",'w') as f:
349
+ f.write(long_message)
350
+ return f'์ž…๋ ฅํ•ด์ฃผ์‹  ๊ธ€์€ {length_of_lm}์ž ์ž…๋‹ˆ๋‹ค. ์ด ๊ธ€์€ ๋ถ„์„ํ•ด๋ณผ๋งŒ ํ•ด์š”. ๋ถ„์„์„ ์›ํ•˜์‹ ๋‹ค๋ฉด "๋ถ„์„์‹œ์ž‘" ์ด๋ผ๊ณ  ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”'
351
+ else :
352
+ return f'์ž…๋ ฅํ•ด์ฃผ์‹  ๊ธ€์€ {length_of_lm}์ž ์ž…๋‹ˆ๋‹ค. ๋ถ„์„ํ•˜๊ธฐ์— ์กฐ๊ธˆ ์งง์•„์š”. ์กฐ๊ธˆ ๋” ์ž…๋ ฅํ•ด์ฃผ์‹œ๊ฒ ์–ด์š”?'
353
+
354
+ elif message.find('๋ถ„์„์‹œ์ž‘')>=0:
355
+ with open(f"./result/{nickname}/contents.txt",'r') as f:
356
+ orign_essay = f.read()
357
+ all_process(orign_essay, nickname)
358
+ else:
359
+ return '์ฒ˜์Œ๋ถ€ํ„ฐ ์‹œ์ž‘ํ•ด์ฃผ์„ธ์š”'
360
+
361
+ except:
362
+ return '์—๋Ÿฌ๊ฐ€ ๋ฐœ์ƒํ–ˆ์–ด์š”. ์ฒ˜์Œ๋ถ€ํ„ฐ ์‹œ์ž‘ํ•ฉ๋‹ˆ๋‹ค. ๋‹‰๋„ค์ž„: ์„ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”'
363
+
364
+ import gradio as gr
365
+ import requests
366
+ history = []
367
+ info_dict = {}
368
+ iface = gr.ChatInterface(
369
+ fn=run_all,
370
+ chatbot = gr.Chatbot(),
371
+ textbox = gr.Textbox(placeholder='์ฑ—๋ด‡์˜ ์š”์ฒญ ์ ‘๋‘์‚ฌ๋ฅผ ํฌํ•จํ•˜์—ฌ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”', container = True, scale = 7),
372
+ title = 'MooGeulMooGeul',
373
+ description = '๋‹น์‹ ์˜ ๋‹‰๋„ค์ž„๋ถ€ํ„ฐ ์ •ํ•ด์„œ ์•Œ๋ ค์ฃผ์„ธ์š”. "๋‹‰๋„ค์ž„: " ์„ ํฌํ•จํ•ด์„œ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”.',
374
+ theme = 'soft',
375
+ examples = ['๋‹‰๋„ค์ž„: ์ปคํ”ผ๋Ÿฌ๋ฒ„',
376
+ '๋‹จ์–ด: ์ปคํ”ผ',
377
+ '๋ฌธ์žฅ: ์ผ์ •ํ•œ ์ฃผ์ œ๋‚˜ ์ค„๊ฑฐ๋ฆฌ๋ฅผ ๊ฐ€์ง„ ์ด์•ผ๊ธฐ',
378
+ '์งง์€๊ธ€: ์–ด๋–ค ์ฃผ์ œ๋‚˜ ์ค„๊ฑฐ๋ฆฌ์— ๋Œ€ํ•ด์„œ๋„ ์ด์•ผ๊ธฐ๋ฅผ ์ž˜ ํ•˜๋Š” ์‚ฌ๋žŒ์ด ํ•˜๋‚˜ ์žˆ์—ˆ๋‹ค. ๋‚˜์˜ ์ด๋ชจ. ๊ทธ ์‚ฌ๋žŒ์€ ์ปคํ”ผ ํ•œ์ž”๋งŒ ์žˆ๋‹ค๋ฉด ์–ด๋–ค ์ด์•ผ๊ธฐ๋“  ๋‚ด๊ฒŒ ๋“ค๋ ค์ฃผ์—ˆ๋‹ค.',
379
+ '''๊ธด๊ธ€: ์–ด๋–ค ์ฃผ์ œ๋‚˜ ์ค„๊ฑฐ๋ฆฌ์— ๋Œ€ํ•ด์„œ๋„ ์ด์•ผ๊ธฐ๋ฅผ ์ž˜ ํ•˜๋Š” ์‚ฌ๋žŒ์ด ํ•˜๋‚˜ ์žˆ์—ˆ๋‹ค. ๋‚˜์˜ ์ด๋ชจ. ๊ทธ ์‚ฌ๋žŒ์€ ์ปคํ”ผ ํ•œ ์ž”๋งŒ ์žˆ๋‹ค๋ฉด ์–ด๋–ค ์ด์•ผ๊ธฐ๋“  ํ•  ์ˆ˜ ์žˆ์—ˆ๋‹ค.
380
+ ์–ด๋ฆฐ์‹œ์ ˆ์˜ ๋‚˜๋Š” ๊ทธ ์ด์•ผ๊ธฐ๋ฅผ ๋“ฃ๊ธฐ ์œ„ํ•ด ํ•„์‚ฌ์ ์œผ๋กœ ์ง‘์œผ๋กœ ๋Œ์•„์™”๋‹ค. ์œ ์น˜์›๋•Œ๋Š” ์ง‘์— ๊ฐ€์•ผ ํ•œ๋‹ค๋ฉฐ ๋–ผ๋ฅผ ์“ฐ๊ณ  ์šธ์—ˆ๋‹ค๊ณ  ํ–ˆ๋‹ค.
381
+ ์ดˆ๋“ฑํ•™์ƒ์ด ๋˜์–ด์„œ๋Š” 4๊ต์‹œ ๋•ก! ํ•˜๋Š” ์†Œ๋ฆฌ๊ฐ€ ๋“ค๋ฆฌ๋ฉด ๊ฐ€๋ฐฉ์„ ์žฌ๋นจ๋ฆฌ ์‹ธ์„œ ์ง‘์œผ๋กœ ๋Œ์•„์™”๋‹ค. ์ง‘์—๋Š” ํ•ญ์ƒ ๋‚˜๋ฅผ ๊ธฐ๋‹ค๋ฆฌ๊ณ  ์žˆ๋Š” ์ด๋ชจ์™€ ์ด๋ชจ์˜ ์ปคํ”ผ ๋ƒ„์ƒˆ๊ฐ€ ์žˆ์—ˆ๋‹ค.
382
+ ๋”ฐ๋œปํ•œ ๋ฏน์Šค์ปคํ”ผ๋ƒ„์ƒˆ, ๊ทธ๋ฆฌ๊ณ  ๊ณ ์š”ํ•œ ์ง‘์•ˆ์— ์šธ๋ฆฌ๋˜ ์ด์•ผ๊นƒ๊ฑฐ๋ฆฌ๊ฐ€ ์ƒ์ƒํ•˜๋‹ค. ์ด๋ชจ๋Š” ์–ด๋–ป๊ฒŒ ๊ทธ ๋งŽ์€ ์ด์•ผ๊ธฐ๋ฅผ ์•Œ๊ณ  ์žˆ์—ˆ์„๊นŒ.
383
+ ํ•œ๋ฒˆ์€ ์ •๋ง ๋ฌผ์–ด๋ณธ ์ ์ด ์žˆ์—ˆ๋‹ค. ์–ด๋–ป๊ฒŒ ํ•ด์„œ ๊ทธ๋Ÿฐ ์ด์•ผ๊ธฐ๋ฅผ ์•Œ๊ณ  ์žˆ๋Š๋ƒ๊ณ . ๊ทธ๋Ÿด๋•Œ ๋งˆ๋‹ค ์ด๋ชจ๋Š” ๋‚ด๊ฒŒ ์–ด๋ฅธ์ด ๋˜๋ผ๊ณ  ๋งํ•ด์คฌ๋‹ค.
384
+
385
+ '์–ด๋ฅธ์ด ๋˜๋ฉด ์•Œ ์ˆ˜ ์žˆ์–ด. ์–ด๋ฅธ์ด ๋˜๋ ด.'
386
+ ์–ด๋ฅธ, ๊ทธ ๋‹น์‹œ์˜ ๋‚˜๋Š” ์žฅ๋ž˜ํฌ๋ง์œผ๋กœ <์–ด๋ฅธ>์„ ์จ๋„ฃ์„ ์ •๋„์˜€๋‹ค.
387
+ '''],
388
+ cache_examples = False,
389
+ retry_btn = None,
390
+ undo_btn = 'Delete Previous',
391
+ clear_btn = 'Clear',
392
+
393
+ )
394
+ iface.launch(share=True)