Spaces:
eaglesarezzo
/
Running on Zero

gufett0 commited on
Commit
12e1362
1 Parent(s): 7d1e086

first files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/* filter=lfs diff=lfs merge=lfs -text
37
+ *.docx filter=lfs diff=lfs merge=lfs -text
.gitattributes copy ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ metaverse_executive_summary.docx filter=lfs diff=lfs merge=lfs -text
37
+ data/payment/payment_executive_summary.docx filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ appold.py
app.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import os
3
+ import gradio as gr
4
+ from models import download_models
5
+ from rag_backend import Backend
6
+ from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
7
+ from llama_cpp_agent.providers import LlamaCppPythonProvider
8
+ from llama_cpp_agent.chat_history import BasicChatHistory
9
+ from llama_cpp_agent.chat_history.messages import Roles
10
+ import cv2
11
+
12
+ # get the models
13
+ huggingface_token = os.environ.get('HF_TOKEN')
14
+ download_models(huggingface_token)
15
+
16
+ documents_paths = {
17
+ 'blockchain': 'data/blockchain',
18
+ 'metaverse': 'data/metaverse',
19
+ 'payment': 'data/payment'
20
+ }
21
+
22
+
23
+ # initialize backend (not ideal as global variable...)
24
+ backend = Backend()
25
+
26
+ cv2.setNumThreads(1)
27
+
28
+ @spaces.GPU(duration=20)
29
+ def respond(
30
+ message,
31
+ history: list[tuple[str, str]],
32
+ model,
33
+ system_message,
34
+ max_tokens,
35
+ temperature,
36
+ top_p,
37
+ top_k,
38
+ repeat_penalty,
39
+ ):
40
+ chat_template = MessagesFormatterType.GEMMA_2
41
+
42
+ print("HISTORY SO FAR ", history)
43
+
44
+ matched_path = None
45
+ words = message.lower()
46
+
47
+ for key, path in documents_paths.items():
48
+ if len(history) == 1 and key in words: # check if the user mentions a path word only during second interaction (i.e history has only one entry)
49
+ matched_path = path
50
+ break
51
+ print("matched_path", matched_path)
52
+
53
+ if matched_path: # this case would only be true in second interaction
54
+ original_message = history[0][0]
55
+ print("** matched path!!")
56
+ query_engine = backend.create_index_for_query_engine(matched_path)
57
+ message = backend.generate_prompt(query_engine, original_message)
58
+
59
+ gr.Info("Relevant context indexed from docs...")
60
+
61
+ elif (not matched_path) and (len(history) > 1):
62
+ print("Using context from storage db")
63
+ query_engine = backend.load_index_for_query_engine()
64
+ message = backend.generate_prompt(query_engine, message)
65
+
66
+ gr.Info("Relevant context extracted from db...")
67
+
68
+ # Load model only if it's not already loaded or if a new model is selected
69
+ if backend.llm is None or backend.llm_model != model:
70
+ try:
71
+ backend.load_model(model)
72
+ except Exception as e:
73
+ return f"Error loading model: {str(e)}"
74
+
75
+ provider = LlamaCppPythonProvider(backend.llm)
76
+
77
+ agent = LlamaCppAgent(
78
+ provider,
79
+ system_prompt=f"{system_message}",
80
+ predefined_messages_formatter_type=chat_template,
81
+ debug_output=True
82
+ )
83
+
84
+ settings = provider.get_provider_default_settings()
85
+ settings.temperature = temperature
86
+ settings.top_k = top_k
87
+ settings.top_p = top_p
88
+ settings.max_tokens = max_tokens
89
+ settings.repeat_penalty = repeat_penalty
90
+ settings.stream = True
91
+
92
+ messages = BasicChatHistory()
93
+
94
+ # add user and assistant messages to the history
95
+ for msn in history:
96
+ user = {'role': Roles.user, 'content': msn[0]}
97
+ assistant = {'role': Roles.assistant, 'content': msn[1]}
98
+ messages.add_message(user)
99
+ messages.add_message(assistant)
100
+
101
+
102
+ try:
103
+ stream = agent.get_chat_response(
104
+ message,
105
+ llm_sampling_settings=settings,
106
+ chat_history=messages,
107
+ returns_streaming_generator=True,
108
+ print_output=False
109
+ )
110
+
111
+ outputs = ""
112
+ for output in stream:
113
+ outputs += output
114
+ yield outputs
115
+ except Exception as e:
116
+ yield f"Error during response generation: {str(e)}"
117
+
118
+
119
+
120
+ demo = gr.ChatInterface(
121
+ fn=respond,
122
+ css="""
123
+ .gradio-container {
124
+ background-color: #B9D9EB;
125
+ color: #003366;
126
+ }""",
127
+ additional_inputs=[
128
+ gr.Dropdown([
129
+ 'Meta-Llama-3.1-8B-Instruct-Q5_K_M.gguf',
130
+ 'Mistral-Nemo-Instruct-2407-Q5_K_M.gguf',
131
+ 'gemma-2-2b-it-Q6_K_L.gguf',
132
+ 'openchat-3.6-8b-20240522-Q6_K.gguf',
133
+ 'Llama-3-Groq-8B-Tool-Use-Q6_K.gguf',
134
+ 'MiniCPM-V-2_6-Q6_K.gguf',
135
+ 'llama-3.1-storm-8b-q5_k_m.gguf',
136
+ 'orca-2-7b-patent-instruct-llama-2-q5_k_m.gguf'
137
+ ],
138
+ value="gemma-2-2b-it-Q6_K_L.gguf",
139
+ label="Model"
140
+ ),
141
+ gr.Textbox(value="""Solamente all'inizio, presentati come Odi, un assistente ricercatore italiano creato dagli Osservatori del Politecnico di Milano e specializzato nel fornire risposte precise e pertinenti solo ad argomenti di innovazione digitale.
142
+ Solo nella tua prima risposta, chiedi all'utente di indicare a quale di queste tre sezioni degli Osservatori si riferisce la sua domanda: 'Blockchain', 'Payment' o 'Metaverse'.
143
+ Per le risposte successive, utilizza la cronologia della chat o il contesto fornito per aiutare l'utente a ottenere una risposta accurata.
144
+ Non rispondere mai a domande che non sono pertinenti a questi argomenti.""", label="System message"),
145
+ gr.Slider(minimum=1, maximum=4096, value=3048, step=1, label="Max tokens"),
146
+ gr.Slider(minimum=0.1, maximum=4.0, value=1.2, step=0.1, label="Temperature"),
147
+ gr.Slider(
148
+ minimum=0.1,
149
+ maximum=1.0,
150
+ value=0.95,
151
+ step=0.05,
152
+ label="Top-p",
153
+ ),
154
+ gr.Slider(
155
+ minimum=0,
156
+ maximum=100,
157
+ value=30,
158
+ step=1,
159
+ label="Top-k",
160
+ ),
161
+ gr.Slider(
162
+ minimum=0.0,
163
+ maximum=2.0,
164
+ value=1.1,
165
+ step=0.1,
166
+ label="Repetition penalty",
167
+
168
+ ),
169
+
170
+ ],
171
+ retry_btn="Riprova",
172
+ undo_btn="Annulla",
173
+ clear_btn="Pulisci",
174
+ submit_btn="Invia",
175
+ title="Odi, l'assistente ricercatore degli Osservatori",
176
+ chatbot=gr.Chatbot(
177
+ scale=1,
178
+ likeable=False,
179
+ show_copy_button=True
180
+ ),
181
+ examples=[["Ciao, in cosa puoi aiutarmi?"],["Quanto vale il mercato italiano?"], ["Per favore dammi informazioni sugli ambiti applicativi"], ["Svelami una buona ricetta milanese"] ],
182
+ cache_examples=False,
183
+ )
184
+
185
+ if __name__ == "__main__":
186
+ demo.launch()
data/metaverse/.DS_Store ADDED
Binary file (6.15 kB). View file
 
data/metaverse/Glossario_metaverse.docx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:076b383bc64c0c0231fa3549545683d498fb73370fd54fdf3b8ffe9471e4dbb6
3
+ size 31966
data/metaverse/metaverse_executive_summary.docx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:854abbec61fc27fc87669fbcaa6f4a5aafaac93ea715492e775d53a59d091a29
3
+ size 8377251
data/payment/Glossario_payment.docx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9c26b362e192eb74e8a466bb62529d5830da926f675a91605d10dce0145f311
3
+ size 22331
data/payment/payment_executive_summary.docx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7cd5de9017d27fb118612bb5c1d624f7697fb3716272e3a382d9e126216cc1a
3
+ size 4110078
data/payment/paymentprova.txt ADDED
The diff for this file is too large to render. See raw diff
 
db/.gitignore ADDED
File without changes
models.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import hf_hub_download
2
+
3
+ def download_models(huggingface_token):
4
+ models = [
5
+ ("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF", "Meta-Llama-3.1-8B-Instruct-Q5_K_M.gguf"),
6
+ ("bartowski/Mistral-Nemo-Instruct-2407-GGUF", "Mistral-Nemo-Instruct-2407-Q5_K_M.gguf"),
7
+ ("bartowski/gemma-2-2b-it-GGUF", "gemma-2-2b-it-Q6_K_L.gguf"),
8
+ ("bartowski/openchat-3.6-8b-20240522-GGUF", "openchat-3.6-8b-20240522-Q6_K.gguf"),
9
+ ("bartowski/Llama-3-Groq-8B-Tool-Use-GGUF", "Llama-3-Groq-8B-Tool-Use-Q6_K.gguf"),
10
+ ("bartowski/MiniCPM-V-2_6-GGUF", "MiniCPM-V-2_6-Q6_K.gguf"),
11
+ ("CaioXapelaum/Llama-3.1-Storm-8B-Q5_K_M-GGUF", "llama-3.1-storm-8b-q5_k_m.gguf"),
12
+ ("CaioXapelaum/Orca-2-7b-Patent-Instruct-Llama-2-Q5_K_M-GGUF", "orca-2-7b-patent-instruct-llama-2-q5_k_m.gguf"),
13
+ ]
14
+
15
+ for repo_id, filename in models:
16
+ hf_hub_download(
17
+ repo_id=repo_id,
18
+ filename=filename,
19
+ local_dir="./models",
20
+ token=huggingface_token
21
+ )
rag_backend.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from llama_cpp import Llama
3
+ from llama_index.core import VectorStoreIndex, Settings, SimpleDirectoryReader, load_index_from_storage, StorageContext
4
+ from llama_index.core.node_parser import SentenceSplitter
5
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
6
+
7
+
8
+ Settings.llm = None
9
+
10
+ class Backend:
11
+ def __init__(self):
12
+ self.llm = None
13
+ self.llm_model = None
14
+ self.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
15
+ self.PERSIST_DIR = "./db"
16
+ os.makedirs(self.PERSIST_DIR, exist_ok=True)
17
+
18
+ def load_model(self, model_path):
19
+ self.llm = Llama(
20
+ model_path=f"models/{model_path}",
21
+ flash_attn=True,
22
+ n_gpu_layers=81,
23
+ n_batch=1024,
24
+ n_ctx=8192,
25
+ )
26
+ self.llm_model = model_path
27
+
28
+
29
+ def create_index_for_query_engine(self, matched_path):
30
+
31
+ documents = SimpleDirectoryReader(input_dir=matched_path).load_data()
32
+ storage_context = StorageContext.from_defaults()
33
+ nodes = SentenceSplitter(chunk_size=256, chunk_overlap=64, paragraph_separator="\n\n").get_nodes_from_documents(documents)
34
+ index = VectorStoreIndex(nodes, embed_model=self.embed_model)
35
+ query_engine = index.as_query_engine(
36
+ similarity_top_k=4, response_mode="tree_summarize"
37
+ )
38
+ index.storage_context.persist(persist_dir=self.PERSIST_DIR)
39
+
40
+ return query_engine
41
+
42
+
43
+ # here we're leveraging an already constructed and stored FAISS index
44
+ def load_index_for_query_engine(self):
45
+ storage_context = StorageContext.from_defaults(persist_dir=self.PERSIST_DIR)
46
+ index = load_index_from_storage(storage_context, embed_model=self.embed_model)
47
+
48
+ query_engine = index.as_query_engine(
49
+ similarity_top_k=4, response_mode="tree_summarize"
50
+ )
51
+ return query_engine
52
+
53
+
54
+ def generate_prompt(self, query_engine, message):
55
+ relevant_chunks = query_engine.retrieve(message)
56
+ print(f"Found: {len(relevant_chunks)} relevant chunks")
57
+
58
+ prompt = "Considera questo come tua base di conoscenza personale:\n==========Conoscenza===========\n"
59
+ for idx, chunk in enumerate(relevant_chunks):
60
+ print(f"{idx + 1}) {chunk.text[:64]}...")
61
+ prompt += chunk.text + "\n\n"
62
+ prompt += "\n======================\nDomanda: " + message
63
+ return prompt
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ spaces
2
+ huggingface_hub
3
+ scikit-build-core
4
+ https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.90-cu124/llama_cpp_python-0.2.90-cp310-cp310-linux_x86_64.whl
5
+ git+https://github.com/Maximilian-Winter/llama-cpp-agent
6
+ opencv-python
7
+ llama-index
8
+ llama-index-embeddings-huggingface
9
+ llama-index-embeddings-instructor
10
+ docx2txt