isayahc commited on
Commit
8e29230
1 Parent(s): da03ac1

addded summaries to metadata

Browse files
config.py CHANGED
@@ -1,15 +1,24 @@
1
  import os
2
  from dotenv import load_dotenv
3
  from rag_app.database.db_handler import DataBaseHandler
 
4
 
5
  load_dotenv()
6
 
7
  SQLITE_FILE_NAME = os.getenv('SOURCES_CACHE')
8
  PERSIST_DIRECTORY = os.getenv('VECTOR_DATABASE_LOCATION')
9
  EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL")
 
10
 
11
 
12
  db = DataBaseHandler()
13
 
14
  db.create_all_tables()
15
 
 
 
 
 
 
 
 
 
1
  import os
2
  from dotenv import load_dotenv
3
  from rag_app.database.db_handler import DataBaseHandler
4
+ from langchain_huggingface import HuggingFaceEndpoint
5
 
6
  load_dotenv()
7
 
8
  SQLITE_FILE_NAME = os.getenv('SOURCES_CACHE')
9
  PERSIST_DIRECTORY = os.getenv('VECTOR_DATABASE_LOCATION')
10
  EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL")
11
+ SECONDARY_LLM_MODEL = os.getenv("SECONDARY_LLM_MODEL")
12
 
13
 
14
  db = DataBaseHandler()
15
 
16
  db.create_all_tables()
17
 
18
+ SECONDARY_LLM = HuggingFaceEndpoint(
19
+ repo_id=SECONDARY_LLM_MODEL,
20
+ temperature=0.1, # Controls randomness in response generation (lower value means less random)
21
+ max_new_tokens=1024, # Maximum number of new tokens to generate in responses
22
+ repetition_penalty=1.2, # Penalty for repeating the same words (higher value increases penalty)
23
+ return_full_text=False # If False, only the newly generated text is returned; if True, the input is included as well
24
+ )
rag_app/chains/__init__.py CHANGED
@@ -1 +1,2 @@
1
- from rag_app.chains.user_response_sentiment_chain import user_response_sentiment_prompt
 
 
1
+ from rag_app.chains.user_response_sentiment_chain import user_response_sentiment_prompt
2
+ from rag_app.chains.generate_document_summary import generate_document_summary_prompt
rag_app/chains/generate_document_summary.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.prompts import PromptTemplate
2
+
3
+
4
+ generate_document_summary_template = """
5
+ You will be given a document object
6
+ =================
7
+ {document}
8
+ ====================
9
+ You must generate a summary
10
+
11
+
12
+ """
13
+
14
+ generate_document_summary_prompt = PromptTemplate.from_template(generate_document_summary_template)
rag_app/knowledge_base/utils.py CHANGED
@@ -1,7 +1,39 @@
1
  from langchain_core.documents import Document
 
 
2
 
3
 
4
  def generate_document_summaries(
5
  docs: list[Document]
6
- ):
7
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from langchain_core.documents import Document
2
+ from chains import generate_document_summary_prompt
3
+ from config import SECONDARY_LLM
4
 
5
 
6
  def generate_document_summaries(
7
  docs: list[Document]
8
+ ) -> list[Document]:
9
+ """
10
+ Generates summaries for a list of Document objects and updates their metadata with the summaries.
11
+
12
+ Args:
13
+ docs (List[Document]): A list of Document objects to generate summaries for.
14
+
15
+ Returns:
16
+ List[Document]: A new list of Document objects with updated metadata containing the summaries.
17
+
18
+ Example:
19
+ docs = [Document(metadata={"title": "Doc1"}), Document(metadata={"title": "Doc2"})]
20
+ updated_docs = generate_document_summaries(docs)
21
+ for doc in updated_docs:
22
+ print(doc.metadata["summary"])
23
+
24
+ """
25
+
26
+ new_docs = docs.copy()
27
+
28
+ for doc in new_docs:
29
+
30
+ genrate_summary_chain = generate_document_summary_prompt | SECONDARY_LLM
31
+ summary = genrate_summary_chain.invoke(
32
+ {"document":str(doc.metadata)}
33
+ )
34
+
35
+ doc.metadata.update(
36
+ {"summary":summary}
37
+ )
38
+
39
+ return new_docs