import os import pprint import codecs import chardet import gradio as gr from langchain.llms import HuggingFacePipeline from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from langchain import OpenAI, ConversationChain, LLMChain, PromptTemplate from langchain.chains.conversation.memory import ConversationalBufferWindowMemory from EdgeGPT import Chatbot def get_content(input_file): # Read the input file in binary mode with open(input_file, 'rb') as f: raw_data = f.read() # Detect the encoding of the file result = chardet.detect(raw_data) encoding = result['encoding'] # Decode the contents using the detected encoding with codecs.open(input_file, 'r', encoding=encoding) as f: raw_text = f.read() # Return the content of the input file return raw_text def create_docs(input_file): # Create a text splitter object with a separator character text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=0, length_function=len, ) basename = os.path.basename(input_file) basename = os.path.splitext(basename)[0] texts = get_content(input_file=input_file) metadatas = {'source': basename} docs = text_splitter.create_documents(texts=[texts], metadatas=[metadatas]) return docs def get_similar_docs(query, index): similar_docs = index.similarity_search(query=query) result = [(d.summary, d.metadata) for d in similar_docs] return result def convert_to_html(similar_docs): result = [] for summary, metadata in similar_docs: record = '' + summary + '' + \ metadata['source'] + '' result.append(record) html = '' + \ '\n'.join(result) + '
Page ContentSource
' return html def start_ui(index): def query_index(query): similar_docs = get_similar_docs(query=query, index=index) formatted_output = convert_to_html(similar_docs=similar_docs) return formatted_output # Define input and output types input = gr.inputs.Textbox(lines=2) output = gr.outputs.HTML() # Create interface object iface = gr.Interface(fn=query_index, inputs=input, outputs=output) # Launch interface iface.launch()