Spaces:
Running
on
Zero
Running
on
Zero
File size: 2,527 Bytes
12e1362 b5817a3 12e1362 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import os
from llama_cpp import Llama
from llama_index.core import VectorStoreIndex, Settings, SimpleDirectoryReader, load_index_from_storage, StorageContext
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
Settings.llm = None
class Backend:
def __init__(self):
self.llm = None
self.llm_model = None
self.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
self.PERSIST_DIR = "./db"
os.makedirs(self.PERSIST_DIR, exist_ok=True)
def load_model(self, model_path):
self.llm = Llama(
model_path=f"models/{model_path}",
flash_attn=True,
n_gpu_layers=81,
n_batch=1024,
n_ctx=8192,
)
self.llm_model = model_path
def create_index_for_query_engine(self, matched_path):
documents = SimpleDirectoryReader(input_dir=matched_path).load_data()
storage_context = StorageContext.from_defaults()
nodes = SentenceSplitter(chunk_size=256, chunk_overlap=64, paragraph_separator="\n\n").get_nodes_from_documents(documents)
index = VectorStoreIndex(nodes, embed_model=self.embed_model)
query_engine = index.as_query_engine(
similarity_top_k=4, response_mode="tree_summarize"
)
index.storage_context.persist(persist_dir=self.PERSIST_DIR)
return query_engine
# here we're leveraging an already constructed and stored FAISS index
def load_index_for_query_engine(self):
storage_context = StorageContext.from_defaults(persist_dir=self.PERSIST_DIR)
index = load_index_from_storage(storage_context, embed_model=self.embed_model)
query_engine = index.as_query_engine(
similarity_top_k=4, response_mode="tree_summarize"
)
return query_engine
def generate_prompt(self, query_engine, message):
relevant_chunks = query_engine.retrieve(message)
print(f"Found: {len(relevant_chunks)} relevant chunks")
prompt = "Considera questo come tua base di conoscenza personale:\n==========Conoscenza===========\n"
for idx, chunk in enumerate(relevant_chunks):
print(f"{idx + 1}) {chunk.text[:64]}...")
prompt += chunk.text + "\n\n"
prompt += "\n======================\nDomanda: " + message
return prompt |