from ragatouille import RAGPretrainedModel import os import gradio as gr from unicodedata import normalize path_to_index = 'colbert/indexes/ArColbertQuran' message = "waiting to load index ..." if os.path.exists(path_to_index): RAG = RAGPretrainedModel.from_index(path_to_index) message = "index loaded!" print(message) import gradio as gr def process_results(results): answer = "" for r in results: answer += f"Sura: {r['document_id']} ({r['document_metadata']}) \n Text:{r['content']}\n\n" return answer k = 3 # How many documents you want to retrieve def answer_fn(query): results = RAG.search(query= normalize('NFKC', query), k=k) return process_results(results) qapp = gr.Interface(fn=answer_fn, inputs="textbox", outputs="textbox", examples=[ "ما أهمية كتابة المعاملات؟", "أخبرني عن عذاب الله للمنافقين", "حسن معاملة الوالدين", "ما معجزات سيدنا عيسى", "ما هو التطفيف" ,"ما قصة المؤمنين الذين قتلوا في الحفرة؟", "ما آداب اﻻستئذان؟", "النبي الذي تربى في بيت حاكم مصر"], title="Qur\'an Retrieval Demo - Semantic Search", description="A basic demo based on Arabic ColBERT (250k queries, normalized) and simple text of the Qur\'an (also normalized). First query may take a minute, then much faster.\ Try to include relevant terms - this is just retrieval, not LLM chat and Qur\'an is an edge case.\ For details, see: https://www.linkedin.com/posts/akhooli_arabic-1-million-curated-triplets-dataset-activity-7222951839774699521-PZcw",) if __name__ == "__main__": qapp.launch()