Asaad Almutareb commited on
Commit
7acac3e
1 Parent(s): f5d22a4

added S3 locations to example.env

Browse files

expanded test use case in test_this.py
generate summaries in German now

example.env CHANGED
@@ -3,13 +3,14 @@ HUGGINGFACEHUB_API_TOKEN=""
3
  GOOGLE_CSE_ID=""
4
  GOOGLE_API_KEY=""
5
 
6
- # AWS S3 object storage
7
- S3_LOCATION=""
8
- S3_FILE_NAME=""
9
-
10
- # Local vectorstore storage
11
- FAISS_INDEX_PATH = ""
12
 
13
  # llm and embedding models
14
- embedding_model=""
15
- llm_model=""
 
 
3
  GOOGLE_CSE_ID=""
4
  GOOGLE_API_KEY=""
5
 
6
+ # Vectorstore storage on S3 and locally
7
+ S3_LOCATION="rad-rag-demos"
8
+ FAISS_VS_NAME="vectorstores/faiss-insurance-agent-500.zip"
9
+ CHROMA_VS_NAME=""
10
+ FAISS_INDEX_PATH = "./vectorstore/faiss-insurance-agent-500"
11
+ CHROMA_DIRECTORY = "./vectorstore/chroma-insurance-agent-500"
12
 
13
  # llm and embedding models
14
+ EMBEDDING_MODEL="sentence-transformers/multi-qa-mpnet-base-dot-v1"
15
+ LLM_MODEL="mistralai/Mixtral-8x7B-Instruct-v0.1"
16
+ LLM_MODEL_ARGS=
rag_app/generate_summary.py CHANGED
@@ -24,10 +24,10 @@ def generate_keywords(document:dict,
24
 
25
  template = (
26
  """
27
- You are a SEO expert bot. Your task is to craft a meaningful list of 5 keywords to organize documents.
28
- The keywords should help us in searching and retrieving the documents later.
29
 
30
- You will only respond with the clear, concise and meaningful 5 of keywords separated by comma.
31
 
32
  <<<
33
  Document: {document}
@@ -49,10 +49,10 @@ def generate_description(document:dict,
49
 
50
  template = (
51
  """
52
- You are a SEO expert bot. Your task is to craft a meaningful summary to descripe and organize documents.
53
- The description should be a meaningful summary of the document's content and help us in searching and retrieving the documents later.
54
 
55
- You will only respond with the clear, concise and meaningful description.
56
 
57
  <<<
58
  Document: {document}
 
24
 
25
  template = (
26
  """
27
+ Du bist ein SEO-Experten-Bot. Deine Aufgabe ist es, 5 aussagekräftige Schlüsselwörtern zu erstellen, um Dokumente zu organisieren.
28
+ Die Schlüsselwörter sollen uns später beim Suchen, Filtern und Abrufen der Dokumente helfen.
29
 
30
+ Antworte nur mit 5 klaren, prägnanten und aussagekräftigen Schlüsselwörtern, getrennt durch Kommas.
31
 
32
  <<<
33
  Document: {document}
 
49
 
50
  template = (
51
  """
52
+ Du bist ein SEO-Experten-Bot. Deine Aufgabe ist es, eine aussagekräftige Zusammenfassung zu erstellen, um Dokumente zu beschreiben und zu organisieren.
53
+ Die Beschreibung sollte eine aussagekräftige Zusammenfassung des Dokumentinhalts sein und uns später beim Suchen und Abrufen der Dokumente helfen.
54
 
55
+ Antworte nur mit einer klaren, prägnanten und aussagekräftigen Beschreibung in Deutsch.
56
 
57
  <<<
58
  Document: {document}
rag_app/handle_vector_store.py CHANGED
@@ -29,11 +29,11 @@ def build_vector_store(
29
  FAISS_INDEX_PATH = db_path
30
 
31
  embeddings,chunks = create_embeddings(docs, chunk_size, chunk_overlap, embedding_model)
32
- for chunk in chunks:
33
- keywords=generate_keywords(chunk)
34
- description=generate_description(chunk)
35
- chunk.metadata['chunk_keywords']=keywords
36
- chunk.metadata['chunk_description']=description
37
 
38
  #load chunks into vector store
39
  print(f'Loading chunks into faiss vector store ...')
 
29
  FAISS_INDEX_PATH = db_path
30
 
31
  embeddings,chunks = create_embeddings(docs, chunk_size, chunk_overlap, embedding_model)
32
+ # for chunk in chunks:
33
+ # keywords=generate_keywords(chunk)
34
+ # description=generate_description(chunk)
35
+ # chunk.metadata['keywords']=keywords
36
+ # chunk.metadata['description']=description
37
 
38
  #load chunks into vector store
39
  print(f'Loading chunks into faiss vector store ...')
test_this.py CHANGED
@@ -3,13 +3,19 @@ from rag_app.create_embedding import create_embeddings
3
  from rag_app.generate_summary import generate_description, generate_keywords
4
  from rag_app.handle_vector_store import build_vector_store
5
 
6
- docs = load_docs_from_urls(["https://www.wuerttembergische.de/"],5)
 
 
 
7
 
8
- for doc in docs:
9
- keywords=generate_keywords(doc)
10
- description=generate_description(doc)
11
- doc.metadata['keywords']=keywords
12
- doc.metadata['description']=description
 
 
 
13
 
14
  build_vector_store(docs, './vectorstore/faiss-insurance-agent-1500','sentence-transformers/multi-qa-mpnet-base-dot-v1',True,1500,150)
15
 
 
3
  from rag_app.generate_summary import generate_description, generate_keywords
4
  from rag_app.handle_vector_store import build_vector_store
5
 
6
+ # 1. load the urls
7
+ # 2. build the vectorstore -> the function will create the chunking and embeddings
8
+ # 3. initialize the db retriever
9
+ # 4.
10
 
11
+ docs = load_docs_from_urls(["https://www.wuerttembergische.de/"],6)
12
+
13
+ # for doc in docs:
14
+ # keywords=generate_keywords(doc)
15
+ # description=generate_description(doc)
16
+ # doc.metadata['keywords']=keywords
17
+ # doc.metadata['description']=description
18
+ # print(doc.metadata)
19
 
20
  build_vector_store(docs, './vectorstore/faiss-insurance-agent-1500','sentence-transformers/multi-qa-mpnet-base-dot-v1',True,1500,150)
21