Spaces:
Runtime error
Runtime error
File size: 3,198 Bytes
33fe60d d30c111 b4493c7 bf88efa 33fe60d 7e976dc 33fe60d b4493c7 72b8502 b4493c7 33fe60d 6acc7d5 26cc997 33fe60d 6acc7d5 33fe60d 26cc997 b4493c7 d3deef7 7e976dc b4493c7 33fe60d 26cc997 33fe60d 396decf 33fe60d ebf0716 33fe60d 6acc7d5 33fe60d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
from langchain.document_loaders import ConfluenceLoader
from langchain.text_splitter import CharacterTextSplitter, TokenTextSplitter,RecursiveCharacterTextSplitter,SentenceTransformersTokenTextSplitter
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM,pipeline,DistilBertTokenizer,DistilBertForQuestionAnswering
from langchain.llms.huggingface_pipeline import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
class ConfluenceQA:
def init_embeddings(self) -> None:
self.embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
def define_model(self) -> None:
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased-distilled-squad')
model = DistilBertForQuestionAnswering.from_pretrained('distilbert-base-uncased-distilled-squad')
# tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")
# model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large")
pipe = pipeline("question-answering", model=model, tokenizer=tokenizer)
self.llm = HuggingFacePipeline(pipeline = pipe,model_kwargs={"temperature": 0})
def store_in_vector_db(self) -> None:
persist_directory = self.config.get("persist_directory",None)
confluence_url = self.config.get("confluence_url",None)
username = self.config.get("username",None)
api_key = self.config.get("api_key",None)
space_key = self.config.get("space_key",None)
include_attachment = self.config.get("include_attachment", True)
loader = ConfluenceLoader(
url=confluence_url, username=username, api_key=api_key
)
documents = loader.load(include_attachments=include_attachment, limit=100, space_key=space_key)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
documents = text_splitter.split_documents(documents)
self.db = Chroma.from_documents(documents, self.embeddings)
question = "How do I make a space public?"
searchDocs = self.db.similarity_search(question)
print(searchDocs[0].page_content)
def retrieve_qa_chain(self) -> None:
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate(
template=template, input_variables=["context", "question"]
)
chain_type_kwargs = {"prompt": QA_CHAIN_PROMPT}
self.qa = RetrievalQA.from_chain_type(llm=self.llm, chain_type="stuff", retriever=self.db.as_retriever(search_kwargs={"k":4}), chain_type_kwargs=chain_type_kwargs)
def __init__(self,config:dict = {}) -> None:
self.db=None
self.embeddings=None
self.llm=None
self.config=config
self.qa=None
def qa_bot(self, query:str):
result = self.qa.run(query)
return result |