File size: 2,494 Bytes
8c41dd4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import os
import pprint
import codecs
import chardet
import gradio as gr
from langchain.llms import HuggingFacePipeline
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain import OpenAI, ConversationChain, LLMChain, PromptTemplate
from langchain.chains.conversation.memory import ConversationalBufferWindowMemory
from EdgeGPT import Chatbot


def get_content(input_file):
    # Read the input file in binary mode
    with open(input_file, 'rb') as f:
        raw_data = f.read()

    # Detect the encoding of the file
    result = chardet.detect(raw_data)
    encoding = result['encoding']

    # Decode the contents using the detected encoding
    with codecs.open(input_file, 'r', encoding=encoding) as f:
        raw_text = f.read()

    # Return the content of the input file
    return raw_text


def create_docs(input_file):
    # Create a text splitter object with a separator character
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=0,
        length_function=len,
    )

    basename = os.path.basename(input_file)
    basename = os.path.splitext(basename)[0]
    texts = get_content(input_file=input_file)
    metadatas = {'source': basename}
    docs = text_splitter.create_documents(texts=[texts], metadatas=[metadatas])
    return docs


def get_similar_docs(query, index):
    similar_docs = index.similarity_search(query=query)
    result = [(d.summary, d.metadata) for d in similar_docs]
    return result


def convert_to_html(similar_docs):
    result = []
    for summary, metadata in similar_docs:
        record = '<tr><td>' + summary + '</td><td>' + \
            metadata['source'] + '</td></tr>'
        result.append(record)
    html = '<table><thead><th>Page Content</th><th>Source</th></thead><tbody>' + \
        '\n'.join(result) + '</tbody></table>'
    return html


def start_ui(index):
    def query_index(query):
        similar_docs = get_similar_docs(query=query, index=index)
        formatted_output = convert_to_html(similar_docs=similar_docs)
        return formatted_output

    # Define input and output types
    input = gr.inputs.Textbox(lines=2)
    output = gr.outputs.HTML()

    # Create interface object
    iface = gr.Interface(fn=query_index,
                         inputs=input,
                         outputs=output)

    # Launch interface
    iface.launch()