from transformers import T5Tokenizer, T5Model, T5ForConditionalGeneration, pipeline import nltk.data import pandas as pd import matplotlib.pyplot as plt nltk.download('punkt') import gradio as gr from gradio.mix import Parallel tokenizer_t5 = T5Tokenizer.from_pretrained("panggi/t5-base-indonesian-summarization-cased") model_t5 = T5ForConditionalGeneration.from_pretrained("panggi/t5-base-indonesian-summarization-cased") pretrained_sentiment = "w11wo/indonesian-roberta-base-sentiment-classifier" pretrained_ner = "cahya/bert-base-indonesian-NER" sentence_tokenizer = nltk.data.load('tokenizers/punkt/english.pickle') sentiment_pipeline = pipeline( "sentiment-analysis", model=pretrained_sentiment, tokenizer=pretrained_sentiment, return_all_scores=True ) ner_pipeline = pipeline( "ner", model=pretrained_ner, tokenizer=pretrained_ner, grouped_entities=True ) def summ_t5(text): input_ids = tokenizer_t5.encode(text, return_tensors='pt') summary_ids = model_t5.generate(input_ids, max_length=100, num_beams=2, repetition_penalty=2.5, length_penalty=1.0, early_stopping=True, no_repeat_ngram_size=2, use_cache=True) summary_text = tokenizer_t5.decode(summary_ids[0], skip_special_tokens=True) return summary_text def sentiment_analysis(text): output = sentiment_pipeline(text) return {elm["label"]: elm["score"] for elm in output[0]} def ner(text): output = ner_pipeline(text) for elm in output: elm['entity'] = elm['entity_group'] return {"text": text, "entities": output} def sentiment_df(text): df = pd.DataFrame(columns=['Text', 'Label', 'Score']) text_list = sentence_tokenizer.tokenize(text) result = [sentiment_analysis(text) for text in text_list] labels = [] scores = [] for pred in result: idx = list(pred.values()).index(max(list(pred.values()))) labels.append(list(pred.keys())[idx]) scores.append(round(list(pred.values())[idx], 3)) df['Text'] = text_list df['Label'] = labels df['Score'] = scores return df def run(text): summ_ = summ_t5(text) sent_ = sentiment_analysis(summ_) ner_ = ner(summ_) df_ = sentiment_df(text) ner_all = ner(text) fig = plt.figure() df_.groupby(["Label"])["Text"].count().plot.pie(autopct="%.1f%%", figsize=(6,6)) return summ_, sent_, ner_, fig, ner_all, df_ if __name__ == "__main__": with gr.Blocks() as demo: gr.Markdown("""