File size: 5,899 Bytes
4559e52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359361d
 
 
 
996327f
359361d
 
 
 
 
 
4559e52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f407c41
4559e52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160

from llama_index import VectorStoreIndex,download_loader, VectorStoreIndex, ServiceContext, StorageContext, load_index_from_storage
from pathlib import Path
from github import Github
import os
import shutil
import openai
import gradio as gr

from pathlib import Path
from llama_index import download_loader

"""# Github Configeration"""

openai.api_key = os.environ.get("OPENAPI_API_KEY")

# username = 'Akhil-Sharma30'


"""# Reading the Files for LLM Model"""


# Specify the path to the repository
repo_dir = "/content/Akhil-Sharma30.github.io"

# Check if the repository exists and delete it if it does
if os.path.exists(repo_dir):
    shutil.rmtree(repo_dir)


# def combine_md_files(folder_path):
#     MarkdownReader = download_loader("MarkdownReader")
#     loader = MarkdownReader()

#     md_files = [file for file in folder_path.glob('*.md')]
#     documents = None

#     for file_path in md_files:
#         document = loader.load_data(file=file_path)
#         documents += document

#     return documents

# folder_path = Path('/content/Akhil-Sharma30.github.io/content')
#combined_documents = combine_md_files(folder_path)

# combined_documents will be a list containing the contents of all .md files in the folder

RemoteReader = download_loader("RemoteReader")

loader = RemoteReader()


document1 = loader.load_data(url="https://raw.githubusercontent.com/Akhil-Sharma30/Akhil-Sharma30.github.io/main/assets/README.md")
document2 = loader.load_data(url="https://raw.githubusercontent.com/Akhil-Sharma30/Akhil-Sharma30.github.io/main/content/about.md")
document3 = loader.load_data(url="https://raw.githubusercontent.com/Akhil-Sharma30/Akhil-Sharma30.github.io/main/content/cv.md")
document4 = loader.load_data(url="https://raw.githubusercontent.com/Akhil-Sharma30/Akhil-Sharma30.github.io/main/content/post.md")
document5 = loader.load_data(url="https://raw.githubusercontent.com/Akhil-Sharma30/Akhil-Sharma30.github.io/main/content/opensource.md")
document6 = loader.load_data(url="https://raw.githubusercontent.com/Akhil-Sharma30/Akhil-Sharma30.github.io/main/content/supervised.md")

data = document1+ document2 + document3+ document4 + document5+document6


"""# Vector Embedding"""

index = VectorStoreIndex.from_documents(data)

query_engine = index.as_query_engine()
response = query_engine.query("know akhil?")
print(response)

response = query_engine.query("what is name of the person?")
print(response)

"""# ChatBot Interface"""

def chat(chat_history, user_input):

  bot_response = query_engine.query(user_input)
  #print(bot_response)
  response = ""
  for letter in ''.join(bot_response.response): #[bot_response[i:i+1] for i in range(0, len(bot_response), 1)]:
      response += letter + ""
      yield chat_history + [(user_input, response)]

with gr.Blocks() as demo:
    gr.Markdown('# Robotic Akhil')
    gr.Markdown('## "Innovating Intelligence - Unveil the secrets of a cutting-edge ChatBot project that introduces you to the genius behind the machine. πŸ‘¨πŸ»β€πŸ’»πŸ˜Ž')
    gr.Markdown('> Hint: Akhil 2.0')
    gr.Markdown('## Some question you can ask to test Bot:')
    gr.Markdown('#### :) know akhil?')
    gr.Markdown('#### :) write about my work at Agnisys?')
    gr.Markdown('#### :) write about my work at IIT Delhi?')
    gr.Markdown('#### :) was work in P1 Virtual Civilization Initiative opensource?')
    gr.Markdown('#### many more......')
    with gr.Tab("Knowledge Bot"):
#inputbox = gr.Textbox("Input your text to build a Q&A Bot here.....")
          chatbot = gr.Chatbot()
          message = gr.Textbox ("know akhil?")
          message.submit(chat, [chatbot, message], chatbot)

demo.queue().launch()


"""# **Github Setup**"""



"""## Launch Phoenix

Define your knowledge base dataset with a schema that specifies the meaning of each column (features, predictions, actuals, tags, embeddings, etc.). See the [docs](https://docs.arize.com/phoenix/) for guides on how to define your own schema and API reference on `phoenix.Schema` and `phoenix.EmbeddingColumnNames`.
"""

# # get a random sample of 500 documents (including retrieved documents)
# # this will be handled by by the application in a coming release
# num_sampled_point = 500
# retrieved_document_ids = set(
#     [
#         doc_id
#         for doc_ids in query_df[":feature.[str].retrieved_document_ids:prompt"].to_list()
#         for doc_id in doc_ids
#     ]
# )
# retrieved_document_mask = database_df["document_id"].isin(retrieved_document_ids)
# num_retrieved_documents = len(retrieved_document_ids)
# num_additional_samples = num_sampled_point - num_retrieved_documents
# unretrieved_document_mask = ~retrieved_document_mask
# sampled_unretrieved_document_ids = set(
#     database_df[unretrieved_document_mask]["document_id"]
#     .sample(n=num_additional_samples, random_state=0)
#     .to_list()
# )
# sampled_unretrieved_document_mask = database_df["document_id"].isin(
#     sampled_unretrieved_document_ids
# )
# sampled_document_mask = retrieved_document_mask | sampled_unretrieved_document_mask
# sampled_database_df = database_df[sampled_document_mask]

# database_schema = px.Schema(
#     prediction_id_column_name="document_id",
#     prompt_column_names=px.EmbeddingColumnNames(
#         vector_column_name="text_vector",
#         raw_data_column_name="text",
#     ),
# )
# database_ds = px.Dataset(
#     dataframe=sampled_database_df,
#     schema=database_schema,
#     name="database",
# )

"""Define your query dataset. Because the query dataframe is in OpenInference format, Phoenix is able to infer the meaning of each column without a user-defined schema by using the `phoenix.Dataset.from_open_inference` class method."""

# query_ds = px.Dataset.from_open_inference(query_df)

"""Launch Phoenix. Follow the instructions in the cell output to open the Phoenix UI."""

# session = px.launch_app(primary=query_ds, corpus=database_ds)