from llama_index import VectorStoreIndex,download_loader, VectorStoreIndex, ServiceContext, StorageContext, load_index_from_storage from pathlib import Path from github import Github import os import shutil import openai import gradio as gr from pathlib import Path from llama_index import download_loader """# Github Configeration""" openai.api_key = os.environ.get("OPENAPI_API_KEY") # username = 'Akhil-Sharma30' """# Reading the Files for LLM Model""" # Specify the path to the repository repo_dir = "/content/Akhil-Sharma30.github.io" # Check if the repository exists and delete it if it does if os.path.exists(repo_dir): shutil.rmtree(repo_dir) # def combine_md_files(folder_path): # MarkdownReader = download_loader("MarkdownReader") # loader = MarkdownReader() # md_files = [file for file in folder_path.glob('*.md')] # documents = None # for file_path in md_files: # document = loader.load_data(file=file_path) # documents += document # return documents # folder_path = Path('/content/Akhil-Sharma30.github.io/content') #combined_documents = combine_md_files(folder_path) # combined_documents will be a list containing the contents of all .md files in the folder RemoteReader = download_loader("RemoteReader") loader = RemoteReader() document1 = loader.load_data(url="https://raw.githubusercontent.com/Akhil-Sharma30/Akhil-Sharma30.github.io/main/assets/README.md") document2 = loader.load_data(url="https://raw.githubusercontent.com/Akhil-Sharma30/Akhil-Sharma30.github.io/main/content/about.md") document3 = loader.load_data(url="https://raw.githubusercontent.com/Akhil-Sharma30/Akhil-Sharma30.github.io/main/content/cv.md") document4 = loader.load_data(url="https://raw.githubusercontent.com/Akhil-Sharma30/Akhil-Sharma30.github.io/main/content/post.md") document5 = loader.load_data(url="https://raw.githubusercontent.com/Akhil-Sharma30/Akhil-Sharma30.github.io/main/content/opensource.md") document6 = loader.load_data(url="https://raw.githubusercontent.com/Akhil-Sharma30/Akhil-Sharma30.github.io/main/content/supervised.md") data = document1+ document2 + document3+ document4 + document5+document6 """# Vector Embedding""" index = VectorStoreIndex.from_documents(data) query_engine = index.as_query_engine() response = query_engine.query("know akhil?") print(response) response = query_engine.query("what is name of the person?") print(response) """# ChatBot Interface""" def chat(chat_history, user_input): bot_response = query_engine.query(user_input) #print(bot_response) response = "" for letter in ''.join(bot_response.response): #[bot_response[i:i+1] for i in range(0, len(bot_response), 1)]: response += letter + "" yield chat_history + [(user_input, response)] with gr.Blocks() as demo: gr.Markdown('# Robotic Akhil') gr.Markdown('## "Innovating Intelligence - Unveil the secrets of a cutting-edge ChatBot project that introduces you to the genius behind the machine. 👨🏻‍💻😎') gr.Markdown('> Hint: Akhil 2.0') gr.Markdown('## Some question you can ask to test Bot:') gr.Markdown('#### :) know akhil?') gr.Markdown('#### :) write about my work at Agnisys?') gr.Markdown('#### :) write about my work at IIT Delhi?') gr.Markdown('#### :) was work in P1 Virtual Civilization Initiative opensource?') gr.Markdown('#### many more......') with gr.Tab("Knowledge Bot"): #inputbox = gr.Textbox("Input your text to build a Q&A Bot here.....") chatbot = gr.Chatbot() message = gr.Textbox ("know akhil?") message.submit(chat, [chatbot, message], chatbot) demo.queue().launch() """# **Github Setup**""" """## Launch Phoenix Define your knowledge base dataset with a schema that specifies the meaning of each column (features, predictions, actuals, tags, embeddings, etc.). See the [docs](https://docs.arize.com/phoenix/) for guides on how to define your own schema and API reference on `phoenix.Schema` and `phoenix.EmbeddingColumnNames`. """ # # get a random sample of 500 documents (including retrieved documents) # # this will be handled by by the application in a coming release # num_sampled_point = 500 # retrieved_document_ids = set( # [ # doc_id # for doc_ids in query_df[":feature.[str].retrieved_document_ids:prompt"].to_list() # for doc_id in doc_ids # ] # ) # retrieved_document_mask = database_df["document_id"].isin(retrieved_document_ids) # num_retrieved_documents = len(retrieved_document_ids) # num_additional_samples = num_sampled_point - num_retrieved_documents # unretrieved_document_mask = ~retrieved_document_mask # sampled_unretrieved_document_ids = set( # database_df[unretrieved_document_mask]["document_id"] # .sample(n=num_additional_samples, random_state=0) # .to_list() # ) # sampled_unretrieved_document_mask = database_df["document_id"].isin( # sampled_unretrieved_document_ids # ) # sampled_document_mask = retrieved_document_mask | sampled_unretrieved_document_mask # sampled_database_df = database_df[sampled_document_mask] # database_schema = px.Schema( # prediction_id_column_name="document_id", # prompt_column_names=px.EmbeddingColumnNames( # vector_column_name="text_vector", # raw_data_column_name="text", # ), # ) # database_ds = px.Dataset( # dataframe=sampled_database_df, # schema=database_schema, # name="database", # ) """Define your query dataset. Because the query dataframe is in OpenInference format, Phoenix is able to infer the meaning of each column without a user-defined schema by using the `phoenix.Dataset.from_open_inference` class method.""" # query_ds = px.Dataset.from_open_inference(query_df) """Launch Phoenix. Follow the instructions in the cell output to open the Phoenix UI.""" # session = px.launch_app(primary=query_ds, corpus=database_ds)