import gradio as gr import nltk import simplemma from nltk.tokenize import word_tokenize from nltk.tokenize import sent_tokenize from nltk.probability import FreqDist from simplemma import text_lemmatizer nltk.download('punkt') file = "text.txt" def get_lists(file): with open(file, 'r', encoding='utf-8') as f: text = f.read() sent_tokenized_text = sent_tokenize(text, language='italian') sent_tokenized_text_lower = [sent.lower() for sent in sent_tokenized_text] return sent_tokenized_text, sent_tokenized_text_lower sentences, sentences_lower = get_lists(file) def search_engine(target): result = [] for i,sent in enumerate(sentences_lower): if target.lower() in sent: result.append(sentences[i]) if len(result) == 0: return f"Non ho trovato la parola '{target}' nei testi.\n" stringed_results = '' for n,r in enumerate(result): stringed_results += str(n+1) + ': ' + str(r) + '\n\n' else: return f"""Ho trovato {len(result)} {"frasi" if len(result) > 1 else "frase"} in cui รจ presente la parola {target}.\n\n {stringed_results}""" demo = gr.Interface(fn=search_engine, inputs='text', outputs='text') demo.launch()