Spaces:
Runtime error
Runtime error
import os | |
import pprint | |
import codecs | |
import chardet | |
import gradio as gr | |
from langchain.llms import HuggingFacePipeline | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.vectorstores import FAISS | |
from langchain import OpenAI, ConversationChain, LLMChain, PromptTemplate | |
from langchain.chains.conversation.memory import ConversationalBufferWindowMemory | |
from EdgeGPT import Chatbot | |
def get_content(input_file): | |
# Read the input file in binary mode | |
with open(input_file, 'rb') as f: | |
raw_data = f.read() | |
# Detect the encoding of the file | |
result = chardet.detect(raw_data) | |
encoding = result['encoding'] | |
# Decode the contents using the detected encoding | |
with codecs.open(input_file, 'r', encoding=encoding) as f: | |
raw_text = f.read() | |
# Return the content of the input file | |
return raw_text | |
def create_docs(input_file): | |
# Create a text splitter object with a separator character | |
text_splitter = RecursiveCharacterTextSplitter( | |
chunk_size=1000, | |
chunk_overlap=0, | |
length_function=len, | |
) | |
basename = os.path.basename(input_file) | |
basename = os.path.splitext(basename)[0] | |
texts = get_content(input_file=input_file) | |
metadatas = {'source': basename} | |
docs = text_splitter.create_documents(texts=[texts], metadatas=[metadatas]) | |
return docs | |
def get_similar_docs(query, index): | |
similar_docs = index.similarity_search(query=query) | |
result = [(d.summary, d.metadata) for d in similar_docs] | |
return result | |
def convert_to_html(similar_docs): | |
result = [] | |
for summary, metadata in similar_docs: | |
record = '<tr><td>' + summary + '</td><td>' + \ | |
metadata['source'] + '</td></tr>' | |
result.append(record) | |
html = '<table><thead><th>Page Content</th><th>Source</th></thead><tbody>' + \ | |
'\n'.join(result) + '</tbody></table>' | |
return html | |
def start_ui(index): | |
def query_index(query): | |
similar_docs = get_similar_docs(query=query, index=index) | |
formatted_output = convert_to_html(similar_docs=similar_docs) | |
return formatted_output | |
# Define input and output types | |
input = gr.inputs.Textbox(lines=2) | |
output = gr.outputs.HTML() | |
# Create interface object | |
iface = gr.Interface(fn=query_index, | |
inputs=input, | |
outputs=output) | |
# Launch interface | |
iface.launch() |