Farid Karimli commited on
Commit
45d48e2
1 Parent(s): f7c75d4

Dockerfile and minor changes

Browse files
Dockerfile ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11
2
+
3
+ # Change permissions to allow writing to the directory
4
+ RUN chmod -R 777 /apps/ai_tutor
5
+
6
+ WORKDIR /apps/ai_tutor
7
+
8
+ RUN pip install --upgrade pip
9
+ RUN pip install edubotics-core
10
+
11
+ # Create a logs directory and set permissions
12
+ RUN mkdir logs && chmod 777 logs
13
+
14
+ # Create a cache directory within the application's working directory
15
+ RUN mkdir /.cache && chmod -R 777 /.cache
16
+
17
+ # Expose the port the app runs on
18
+ EXPOSE 7860
19
+
20
+ RUN --mount=type=secret,id=HUGGINGFACEHUB_API_TOKEN,mode=0444,required=true
21
+ RUN --mount=type=secret,id=OPENAI_API_KEY,mode=0444,required=true
22
+ RUN --mount=type=secret,id=CHAINLIT_URL,mode=0444,required=true
23
+ RUN --mount=type=secret,id=LITERAL_API_URL,mode=0444,required=true
24
+ RUN --mount=type=secret,id=LLAMA_CLOUD_API_KEY,mode=0444,required=true
25
+ RUN --mount=type=secret,id=OAUTH_GOOGLE_CLIENT_ID,mode=0444,required=true
26
+ RUN --mount=type=secret,id=OAUTH_GOOGLE_CLIENT_SECRET,mode=0444,required=true
27
+ RUN --mount=type=secret,id=LITERAL_API_KEY_LOGGING,mode=0444,required=true
28
+ RUN --mount=type=secret,id=CHAINLIT_AUTH_SECRET,mode=0444,required=true
29
+
30
+ # Default command to run the application
31
+ CMD vectorstore_creator --config_file config/config.yml --project_config_file config/project_config.yml && python app.py
README.md CHANGED
@@ -4,7 +4,7 @@
4
 
5
  This repository contains an LLM-based chatbot designed for the [DS701: Tools for Data Science](https://tools4ds.github.io/fa2024/) course at Boston University. The chatbot serves as an interactive learning assistant, helping students with course-related queries, explanations of concepts, and guidance on data science tools. It is powered by [edubotics-core](https://github.com/edubotics-ai/edubotics-core) a package of modules for data loading, vector store creation and management.
6
 
7
- The chatbot will be available on HuggingFace Spaces soon. Lecture slides are available [here].
8
 
9
  ## Features
10
 
@@ -46,7 +46,9 @@ Note: Make sure to use Python 3.9 or later.
46
 
47
  `pip install edubotics-core`
48
 
49
- 4. Run the chatbot
 
 
50
 
51
  ```python
52
  cd apps/ai-tutor
 
4
 
5
  This repository contains an LLM-based chatbot designed for the [DS701: Tools for Data Science](https://tools4ds.github.io/fa2024/) course at Boston University. The chatbot serves as an interactive learning assistant, helping students with course-related queries, explanations of concepts, and guidance on data science tools. It is powered by [edubotics-core](https://github.com/edubotics-ai/edubotics-core) a package of modules for data loading, vector store creation and management.
6
 
7
+ The chatbot will be available on HuggingFace Spaces soon. Lecture slides are available [here](https://tools4ds.github.io/DS701-Course-Notes/).
8
 
9
  ## Features
10
 
 
46
 
47
  `pip install edubotics-core`
48
 
49
+ 4. Set up environment variables. See `apps/ai_tutor/.env.example` for reference.
50
+
51
+ 5. Run the chatbot
52
 
53
  ```python
54
  cd apps/ai-tutor
apps/ai_tutor/app.py CHANGED
@@ -387,4 +387,4 @@ mount_chainlit(app=app, target="chainlit_app.py", path=CHAINLIT_PATH)
387
  if __name__ == "__main__":
388
  import uvicorn
389
 
390
- uvicorn.run(app, host="127.0.0.1", port=8000)
 
387
  if __name__ == "__main__":
388
  import uvicorn
389
 
390
+ uvicorn.run(app, host="0.0.0.0", port=7860)
apps/ai_tutor/config/config.yml CHANGED
@@ -3,12 +3,12 @@ log_chunk_dir: 'storage/logs/chunks' # str
3
  device: 'cpu' # str [cuda, cpu]
4
 
5
  vectorstore:
6
- load_from_HF: True # bool
7
  reparse_files: True # bool
8
  data_path: 'storage/data' # str
9
  url_file_path: 'storage/data/urls.txt' # str
10
  expand_urls: True # bool
11
- db_option : 'RAGatouille' # str [FAISS, Chroma, RAGatouille, RAPTOR]
12
  db_path : 'vectorstores' # str
13
  model : 'sentence-transformers/all-MiniLM-L6-v2' # str [sentence-transformers/all-MiniLM-L6-v2, text-embedding-ada-002']
14
  search_top_k : 3 # int
@@ -39,7 +39,7 @@ llm_params:
39
  filename: 'tinyllama-1.1b-chat-v1.0.Q5_0.gguf' # Specific name of gguf file in the repo
40
  model_path: 'storage/models/tinyllama-1.1b-chat-v1.0.Q5_0.gguf' # Path to the model file
41
  stream: False # bool
42
- pdf_reader: 'gpt' # str [llama, pymupdf, gpt]
43
 
44
  chat_logging:
45
  log_chat: True # bool
 
3
  device: 'cpu' # str [cuda, cpu]
4
 
5
  vectorstore:
6
+ load_from_HF: False # bool
7
  reparse_files: True # bool
8
  data_path: 'storage/data' # str
9
  url_file_path: 'storage/data/urls.txt' # str
10
  expand_urls: True # bool
11
+ db_option : 'FAISS' # str [FAISS, Chroma, RAGatouille, RAPTOR]
12
  db_path : 'vectorstores' # str
13
  model : 'sentence-transformers/all-MiniLM-L6-v2' # str [sentence-transformers/all-MiniLM-L6-v2, text-embedding-ada-002']
14
  search_top_k : 3 # int
 
39
  filename: 'tinyllama-1.1b-chat-v1.0.Q5_0.gguf' # Specific name of gguf file in the repo
40
  model_path: 'storage/models/tinyllama-1.1b-chat-v1.0.Q5_0.gguf' # Path to the model file
41
  stream: False # bool
42
+ pdf_reader: 'pymupdf' # str [llama, pymupdf, gpt]
43
 
44
  chat_logging:
45
  log_chat: True # bool